Skip to content

Commit 997cde1

Browse files
ooplesclaude
andcommitted
perf: replace scalar column-extraction matmul with Engine.TensorMatMul
DevNetDetector: 2 hidden layers now use Engine.TensorMatMul + Engine.ReLU instead of per-column DotProduct loops with manual ReLU. NBEATSDetector: ForwardFC helper replaced with single Engine.TensorMatMul + Engine.TensorBroadcastAdd call. AnomalyTransformerDetector: feed-forward sublayer now uses Engine.TensorMatMul for both layers + Engine.ReLU, replacing scalar multiply-accumulate and column-extraction loops. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7077765 commit 997cde1

File tree

3 files changed

+41
-51
lines changed

3 files changed

+41
-51
lines changed

src/AnomalyDetection/NeuralNetwork/DevNetDetector.cs

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -393,27 +393,19 @@ private T Forward(Vector<T> x)
393393
throw new InvalidOperationException("Weights not initialized.");
394394
}
395395

396-
// Layer 1
397-
var h1 = new Vector<T>(_hiddenDim);
398-
for (int j = 0; j < _hiddenDim; j++)
399-
{
400-
T sum = b1[j];
401-
var wCol = new Vector<T>(_inputDim);
402-
for (int ii = 0; ii < _inputDim; ii++) wCol[ii] = w1[ii, j];
403-
sum = NumOps.Add(sum, Engine.DotProduct(x, wCol));
404-
h1[j] = NumOps.GreaterThan(sum, NumOps.Zero) ? sum : NumOps.Zero;
405-
}
406-
407-
// Layer 2 — reuse wCol for hidden dim
408-
var h2 = new Vector<T>(_hiddenDim);
409-
var wColH = new Vector<T>(_hiddenDim);
410-
for (int j = 0; j < _hiddenDim; j++)
411-
{
412-
T sum = b2[j];
413-
for (int ii = 0; ii < _hiddenDim; ii++) wColH[ii] = w2[ii, j];
414-
sum = NumOps.Add(sum, Engine.DotProduct(h1, wColH));
415-
h2[j] = NumOps.GreaterThan(sum, NumOps.Zero) ? sum : NumOps.Zero;
416-
}
396+
// Layer 1: h1 = ReLU(x @ W1 + b1) (SIMD via Engine)
397+
var xTensor = Tensor<T>.FromVector(x).Reshape(1, _inputDim);
398+
var h1Pre = Engine.TensorBroadcastAdd(
399+
Engine.TensorMatMul(xTensor, Tensor<T>.FromMatrix(w1)),
400+
Tensor<T>.FromVector(b1).Reshape(1, _hiddenDim));
401+
var h1 = Engine.ReLU(h1Pre.Reshape(_hiddenDim).ToVector());
402+
403+
// Layer 2: h2 = ReLU(h1 @ W2 + b2) (SIMD via Engine)
404+
var h1Tensor = Tensor<T>.FromVector(h1).Reshape(1, _hiddenDim);
405+
var h2Pre = Engine.TensorBroadcastAdd(
406+
Engine.TensorMatMul(h1Tensor, Tensor<T>.FromMatrix(w2)),
407+
Tensor<T>.FromVector(b2).Reshape(1, _hiddenDim));
408+
var h2 = Engine.ReLU(h2Pre.Reshape(_hiddenDim).ToVector());
417409

418410
// Output layer (linear)
419411
T score = b3[0];

src/AnomalyDetection/TimeSeries/AnomalyTransformerDetector.cs

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,29 +1052,31 @@ private Matrix<T> FeedForward(Matrix<T> x)
10521052
var h = new Vector<T>(ffDim);
10531053
var wCol = new Vector<T>(ffDim);
10541054

1055+
var w1Tensor = Tensor<T>.FromMatrix(W1);
1056+
var b1Tensor = Tensor<T>.FromVector(b1).Reshape(1, ffDim);
1057+
var w2Tensor = Tensor<T>.FromMatrix(W2);
1058+
var b2Tensor = Tensor<T>.FromVector(b2).Reshape(1, _modelDim);
1059+
10551060
for (int t = 0; t < seqLen; t++)
10561061
{
1057-
// First layer with ReLU
1058-
for (int j = 0; j < ffDim; j++)
1059-
{
1060-
T sum = b1[j];
1061-
for (int i = 0; i < _modelDim; i++)
1062-
{
1063-
sum = NumOps.Add(sum, NumOps.Multiply(x[t, i], W1[i, j]));
1064-
}
1065-
// ReLU
1066-
h[j] = NumOps.GreaterThan(sum, NumOps.Zero) ? sum : NumOps.Zero;
1067-
}
1068-
1069-
// Second layer with residual connection
1062+
// Extract row as vector for SIMD
1063+
var xRow = new Vector<T>(_modelDim);
1064+
for (int i = 0; i < _modelDim; i++) xRow[i] = x[t, i];
1065+
var xTensor = Tensor<T>.FromVector(xRow).Reshape(1, _modelDim);
1066+
1067+
// Layer 1: h = ReLU(x @ W1 + b1) (SIMD via Engine)
1068+
var h1Pre = Engine.TensorBroadcastAdd(
1069+
Engine.TensorMatMul(xTensor, w1Tensor), b1Tensor);
1070+
var hVec = Engine.ReLU(h1Pre.Reshape(ffDim).ToVector());
1071+
1072+
// Layer 2: out = h @ W2 + b2 (SIMD via Engine)
1073+
var hTensor = Tensor<T>.FromVector(hVec).Reshape(1, ffDim);
1074+
var l2Out = Engine.TensorBroadcastAdd(
1075+
Engine.TensorMatMul(hTensor, w2Tensor), b2Tensor).Reshape(_modelDim).ToVector();
1076+
1077+
// Residual connection
10701078
for (int j = 0; j < _modelDim; j++)
1071-
{
1072-
T sum = b2[j];
1073-
for (int ii = 0; ii < ffDim; ii++) wCol[ii] = W2[ii, j];
1074-
sum = NumOps.Add(sum, Engine.DotProduct(h, wCol));
1075-
// Residual connection
1076-
output[t, j] = NumOps.Add(sum, x[t, j]);
1077-
}
1079+
output[t, j] = NumOps.Add(l2Out[j], x[t, j]);
10781080
}
10791081

10801082
return output;

src/AnomalyDetection/TimeSeries/NBEATSDetector.cs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -857,17 +857,13 @@ private class BlockGradients
857857

858858
private Vector<T> ForwardFC(Vector<T> input, Matrix<T> W, Vector<T> b)
859859
{
860+
// SIMD: output = input @ W + b via Engine.TensorMatMul
860861
int outputSize = W.Columns;
861-
var output = new Vector<T>(outputSize);
862-
var wCol = new Vector<T>(input.Length);
863-
864-
for (int j = 0; j < outputSize; j++)
865-
{
866-
for (int i = 0; i < input.Length; i++) wCol[i] = W[i, j];
867-
output[j] = NumOps.Add(b[j], Engine.DotProduct(input, wCol));
868-
}
869-
870-
return output;
862+
var inputTensor = Tensor<T>.FromVector(input).Reshape(1, input.Length);
863+
var result = Engine.TensorBroadcastAdd(
864+
Engine.TensorMatMul(inputTensor, Tensor<T>.FromMatrix(W)),
865+
Tensor<T>.FromVector(b).Reshape(1, outputSize));
866+
return result.Reshape(outputSize).ToVector();
871867
}
872868

873869
private Vector<T> ForwardFCNoOffset(Vector<T> input, Matrix<T> W, int outputSize)

0 commit comments

Comments
 (0)