Skip to content

Commit d8673da

Browse files
ooplesclaude
andcommitted
fix: address 25 PR review comments — SiLU zero-alloc, arena safety, cleanup
SiLU derivative optimization (18 files): - Replaced ones_t tensor allocation + Fill(One) + TensorAdd pattern with Engine.TensorAddScalar(x, NumOps.One) — zero allocation for 1+x computation - ABCLayer, GatedDeltaNet, GatedDeltaProduct, GatedSlotAttention, HGRN2, Hedgehog, Hyena, Kimi, LogLinear, MesaNet, MixtureOfMamba/Memories, MultiLatentAttention, Rebased, RetNet, Rodimus, TransNormerLLM, TTT ForwardArena fixes: - Reset() uses pre-cached keys array instead of allocating List each call - ShapeKey defensive-copies the shape array to prevent mutation - Made ForwardArena internal (implementation detail, not public API) Other fixes: - RWKV7Block: removed unused TsYt constant - DiffusionConvLayer: removed duplicate using directive - S6Scan: eliminated wasted Rent in initialState branch (clone instead) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 554b23f commit d8673da

22 files changed

+32
-41
lines changed

src/Memory/ForwardArena.cs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ namespace AiDotNet.Memory;
1111
/// and GC pressure during the forward pass. Tensors are pre-created during warmup
1212
/// and recycled across calls.
1313
/// </summary>
14-
public sealed class ForwardArena<T>
14+
internal sealed class ForwardArena<T>
1515
{
1616
private readonly Dictionary<ShapeKey, Tensor<T>[]> _slabs = new();
1717
private readonly Dictionary<ShapeKey, int> _cursors = new();
18+
private ShapeKey[]? _cursorKeysCache;
1819
private const int DefaultSlabSize = 4;
1920
private const int GrowthFactor = 2;
2021

@@ -71,9 +72,12 @@ public Tensor<T> RentUninitialized(int[] shape)
7172
[MethodImpl(MethodImplOptions.AggressiveInlining)]
7273
public void Reset()
7374
{
74-
var keys = new List<ShapeKey>(_cursors.Keys);
75-
foreach (var key in keys)
76-
_cursors[key] = 0;
75+
// Use cached keys array to avoid allocation during reset
76+
if (_cursorKeysCache is null || _cursorKeysCache.Length != _cursors.Count)
77+
_cursorKeysCache = new ShapeKey[_cursors.Count];
78+
_cursors.Keys.CopyTo(_cursorKeysCache, 0);
79+
for (int i = 0; i < _cursorKeysCache.Length; i++)
80+
_cursors[_cursorKeysCache[i]] = 0;
7781
}
7882

7983
/// <summary>
@@ -136,7 +140,7 @@ private Tensor<T> GrowAndRent(ShapeKey key, int[] shape, bool clear = true)
136140

137141
public ShapeKey(int[] shape)
138142
{
139-
_dims = shape;
143+
_dims = (int[])shape.Clone();
140144
unchecked
141145
{
142146
int hash = (int)2166136261;

src/NeuralNetworks/Layers/DiffusionConvLayer.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
using AiDotNet.Enums.AlgorithmTypes;
66
using AiDotNet.Interfaces;
77
using AiDotNet.Memory;
8-
using AiDotNet.Interfaces;
98
using AiDotNet.Tensors.Engines;
109
using AiDotNet.Tensors.Engines.DirectGpu;
1110
using AiDotNet.Tensors.Engines.Gpu;

src/NeuralNetworks/Layers/SSM/ABCLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
478478
var sig = Engine.Sigmoid(x);
479479
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
480480
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
481-
var onePlusXSig = Engine.TensorAdd(CreateOnesLike(xTimesOneMinusSig), xTimesOneMinusSig);
481+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
482482
return Engine.TensorMultiply(sig, onePlusXSig);
483483
}
484484

src/NeuralNetworks/Layers/SSM/GatedDeltaNetLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
470470
var sig = Engine.Sigmoid(x);
471471
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
472472
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
473-
var onePlusXSig = Engine.TensorAdd(CreateOnesLike(xTimesOneMinusSig), xTimesOneMinusSig);
473+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
474474
return Engine.TensorMultiply(sig, onePlusXSig);
475475
}
476476

src/NeuralNetworks/Layers/SSM/GatedDeltaProductLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
609609
var sig = Engine.Sigmoid(x);
610610
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
611611
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
612-
var onePlusXSig = Engine.TensorAdd(CreateOnesLike(xTimesOneMinusSig), xTimesOneMinusSig);
612+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
613613
return Engine.TensorMultiply(sig, onePlusXSig);
614614
}
615615

src/NeuralNetworks/Layers/SSM/GatedSlotAttentionLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
446446
var sig = Engine.Sigmoid(x);
447447
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
448448
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
449-
var onePlusXSig = Engine.TensorAdd(CreateOnesLike(xTimesOneMinusSig), xTimesOneMinusSig);
449+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
450450
return Engine.TensorMultiply(sig, onePlusXSig);
451451
}
452452

src/NeuralNetworks/Layers/SSM/HGRN2Layer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
392392
var sig = Engine.Sigmoid(x);
393393
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
394394
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
395-
var onePlusXSig = Engine.TensorAdd(CreateOnesLike(xTimesOneMinusSig), xTimesOneMinusSig);
395+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
396396
return Engine.TensorMultiply(sig, onePlusXSig);
397397
}
398398

src/NeuralNetworks/Layers/SSM/HedgehogLayer.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
576576
var sig = Engine.Sigmoid(x);
577577
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
578578
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
579-
var ones_t = new Tensor<T>(xTimesOneMinusSig.Shape.ToArray()); ones_t.Fill(NumOps.One);
580-
var onePlusXSig = Engine.TensorAdd(ones_t, xTimesOneMinusSig);
579+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
581580
return Engine.TensorMultiply(sig, onePlusXSig);
582581
}
583582

src/NeuralNetworks/Layers/SSM/HyenaLayer.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -479,8 +479,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
479479
var sig = Engine.Sigmoid(x);
480480
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
481481
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
482-
var onesT = new Tensor<T>(xTimesOneMinusSig.Shape.ToArray()); onesT.Fill(NumOps.One);
483-
var onePlusXSig = Engine.TensorAdd(onesT, xTimesOneMinusSig);
482+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
484483
return Engine.TensorMultiply(sig, onePlusXSig);
485484
}
486485

src/NeuralNetworks/Layers/SSM/KimiLinearAttentionLayer.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,7 @@ private Tensor<T> ComputeSiLUDerivative(Tensor<T> x)
373373
var sig = Engine.Sigmoid(x);
374374
var oneMinusSig = Engine.ScalarMinusTensor(NumOps.One, sig);
375375
var xTimesOneMinusSig = Engine.TensorMultiply(x, oneMinusSig);
376-
var ones_t = new Tensor<T>(xTimesOneMinusSig.Shape.ToArray()); ones_t.Fill(NumOps.One);
377-
var onePlusXSig = Engine.TensorAdd(ones_t, xTimesOneMinusSig);
376+
var onePlusXSig = Engine.TensorAddScalar(xTimesOneMinusSig, NumOps.One);
378377
return Engine.TensorMultiply(sig, onePlusXSig);
379378
}
380379

0 commit comments

Comments
 (0)