fix: address remaining PR review comments batch 2

franklinic · claude · franklinic · commit 12e17b5f8ff8 · 2026-04-04T15:00:57.000-04:00
ConvolutionalLayer: EnsureInitialized allocates _kernels/_biases with
proper shapes before calling InitializeWeights (lazy init fix).

FGSM/PGD: clamp finite-difference probes to [0,1] and use actual
clamped step width for gradient denominator. Uses Min/Max for net471.

DeepGP: removed dead KuuInvKux block, added note about q(f) vs p(f|u)
approximation per Salimbeni &amp; Deisenroth 2017.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/AdversarialRobustness/Attacks/FGSMAttack.cs b/src/AdversarialRobustness/Attacks/FGSMAttack.cs
@@ -160,23 +160,29 @@ private Vector<T> ComputeNumericalGradient(
 
         for (int i = 0; i < vectorInput.Length; i++)
         {
-            // Forward: x + delta * e_i
+            // Forward: x + delta * e_i (clamped to [0,1] valid input domain)
             var plusInput = Engine.Add<T>(vectorInput, Engine.FillZero<T>(vectorInput.Length));
             plusInput[i] = NumOps.Add(plusInput[i], NumOps.FromDouble(delta));
+            double plusVal = Math.Min(1.0, Math.Max(0.0, NumOps.ToDouble(plusInput[i])));
+            plusInput[i] = NumOps.FromDouble(plusVal);
             var plusModelInput = ConversionsHelper.ConvertVectorToInput<T, TInput>(plusInput, referenceInput);
             var plusOutput = ConversionsHelper.ConvertToVector<T, TOutput>(targetModel.Predict(plusModelInput));
             var plusLoss = ComputeMseLoss(plusOutput, vectorLabel);
 
-            // Backward: x - delta * e_i
+            // Backward: x - delta * e_i (clamped to [0,1])
             var minusInput = Engine.Add<T>(vectorInput, Engine.FillZero<T>(vectorInput.Length));
             minusInput[i] = NumOps.Subtract(minusInput[i], NumOps.FromDouble(delta));
+            double minusVal = Math.Min(1.0, Math.Max(0.0, NumOps.ToDouble(minusInput[i])));
+            minusInput[i] = NumOps.FromDouble(minusVal);
             var minusModelInput = ConversionsHelper.ConvertVectorToInput<T, TInput>(minusInput, referenceInput);
             var minusOutput = ConversionsHelper.ConvertToVector<T, TOutput>(targetModel.Predict(minusModelInput));
             var minusLoss = ComputeMseLoss(minusOutput, vectorLabel);
 
-            // Central difference: (loss+ - loss-) / (2 * delta)
-            gradient[i] = NumOps.FromDouble(
-                (NumOps.ToDouble(plusLoss) - NumOps.ToDouble(minusLoss)) / (2.0 * delta));
+            // Central difference using actual clamped step width
+            double actualDelta = plusVal - minusVal;
+            gradient[i] = actualDelta > 1e-10
+                ? NumOps.FromDouble((NumOps.ToDouble(plusLoss) - NumOps.ToDouble(minusLoss)) / actualDelta)
+                : NumOps.Zero;
         }
 
         return gradient;
diff --git a/src/AdversarialRobustness/Attacks/PGDAttack.cs b/src/AdversarialRobustness/Attacks/PGDAttack.cs
@@ -231,23 +231,29 @@ private Vector<T> ComputeNumericalGradient(
 
         for (int i = 0; i < vectorInput.Length; i++)
         {
-            // Forward: x + delta * e_i
+            // Forward: x + delta * e_i (clamped to [0,1] valid input domain)
             var plusInput = Engine.Add<T>(vectorInput, Engine.FillZero<T>(vectorInput.Length));
             plusInput[i] = NumOps.Add(plusInput[i], NumOps.FromDouble(delta));
+            double plusVal = Math.Min(1.0, Math.Max(0.0, NumOps.ToDouble(plusInput[i])));
+            plusInput[i] = NumOps.FromDouble(plusVal);
             var plusModelInput = ConversionsHelper.ConvertVectorToInput<T, TInput>(plusInput, referenceInput);
             var plusOutput = ConversionsHelper.ConvertToVector<T, TOutput>(targetModel.Predict(plusModelInput));
             var plusLoss = ComputeMseLoss(plusOutput, vectorLabel);
 
-            // Backward: x - delta * e_i
+            // Backward: x - delta * e_i (clamped to [0,1])
             var minusInput = Engine.Add<T>(vectorInput, Engine.FillZero<T>(vectorInput.Length));
             minusInput[i] = NumOps.Subtract(minusInput[i], NumOps.FromDouble(delta));
+            double minusVal = Math.Min(1.0, Math.Max(0.0, NumOps.ToDouble(minusInput[i])));
+            minusInput[i] = NumOps.FromDouble(minusVal);
             var minusModelInput = ConversionsHelper.ConvertVectorToInput<T, TInput>(minusInput, referenceInput);
             var minusOutput = ConversionsHelper.ConvertToVector<T, TOutput>(targetModel.Predict(minusModelInput));
             var minusLoss = ComputeMseLoss(minusOutput, vectorLabel);
 
-            // Central difference: (loss+ - loss-) / (2 * delta)
-            gradient[i] = NumOps.FromDouble(
-                (NumOps.ToDouble(plusLoss) - NumOps.ToDouble(minusLoss)) / (2.0 * delta));
+            // Central difference using actual clamped step width
+            double actualDelta = plusVal - minusVal;
+            gradient[i] = actualDelta > 1e-10
+                ? NumOps.FromDouble((NumOps.ToDouble(plusLoss) - NumOps.ToDouble(minusLoss)) / actualDelta)
+                : NumOps.Zero;
         }
 
         return gradient;
diff --git a/src/GaussianProcesses/DeepGaussianProcess.cs b/src/GaussianProcesses/DeepGaussianProcess.cs
@@ -668,6 +668,10 @@ public Matrix<T> Forward(Matrix<T> input, int numSamples, Random random)
         }
 
         // Compute posterior variance per point: σ²(x) = K(x,x) - Kxu * Kuu^{-1} * Kux
+        // NOTE: This uses the prior conditional p(f|u), not the full variational q(f).
+        // For exact DSVI (Salimbeni & Deisenroth 2017), should incorporate the variational
+        // covariance S: σ²_q = σ²_p + Kxu Kuu^{-1} (S - Kuu) Kuu^{-1} Kux.
+        // Current implementation is a valid approximation when S ≈ Kuu.
         var posteriorVar = new double[n];
         for (int i = 0; i < n; i++)
         {
diff --git a/src/NeuralNetworks/Layers/ConvolutionalLayer.cs b/src/NeuralNetworks/Layers/ConvolutionalLayer.cs
@@ -829,7 +829,12 @@ protected override void EnsureInitialized()
             _lastInput = new Tensor<T>([1, InputShape[0], InputShape[1], InputShape[2]]);
             _lastOutput = new Tensor<T>([1, OutputShape[0], OutputShape[1], OutputShape[2]]);
 
-            // Initialize weights (allocates _kernels and _biases)
+            // Allocate kernels and biases with proper shapes before initializing weights.
+            // The lazy path sets _kernels to [0,0,0,0], so we must resize here.
+            _kernels = TensorAllocator.RentUninitialized<T>([OutputDepth, InputDepth, KernelSize, KernelSize]);
+            _biases = new Tensor<T>([OutputDepth]);
+
+            // Initialize weights (fills _kernels and _biases with He-uniform values)
             InitializeWeights();
 
             // Register trainable parameters with the engine for GPU persistence

Original file line number	Diff line number	Diff line change
`@@ -668,6 +668,10 @@ public Matrix<T> Forward(Matrix<T> input, int numSamples, Random random)`
`668`	`668`	`}`
`669`	`669`
`670`	`670`	`// Compute posterior variance per point: σ²(x) = K(x,x) - Kxu * Kuu^{-1} * Kux`
	`671`	`+ // NOTE: This uses the prior conditional p(f\|u), not the full variational q(f).`
	`672`	`+ // For exact DSVI (Salimbeni & Deisenroth 2017), should incorporate the variational`
	`673`	`+ // covariance S: σ²_q = σ²_p + Kxu Kuu^{-1} (S - Kuu) Kuu^{-1} Kux.`
	`674`	`+ // Current implementation is a valid approximation when S ≈ Kuu.`
`671`	`675`	`var posteriorVar = new double[n];`
`672`	`676`	`for (int i = 0; i < n; i++)`
`673`	`677`	`{`