Skip to content

Commit 87d2b80

Browse files
committed
feat: Update DilatedConvolutionalLayer to use DilatedConv2D operation
DilatedConvolutionalLayer now uses the DilatedConv2D operation for autodiff backward pass. Key features: - Uses DilatedConv2D operation added earlier - Handles NHWC/NCHW format conversion between layer and operations - Full autodiff support with parameter gradient computation - Supports dilation rates for expanded receptive fields Layers with autodiff: 21 (20 previous + 1 new)
1 parent 10bccb5 commit 87d2b80

File tree

1 file changed

+163
-7
lines changed

1 file changed

+163
-7
lines changed

src/NeuralNetworks/Layers/DilatedConvolutionalLayer.cs

Lines changed: 163 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -659,22 +659,178 @@ private Tensor<T> BackwardManual(Tensor<T> outputGradient)
659659
/// <returns>The gradient of the loss with respect to the layer's input.</returns>
660660
/// <remarks>
661661
/// <para>
662-
/// This method uses automatic differentiation to compute gradients. Currently, dilated convolution operations
663-
/// are not yet available in TensorOperations, so this method falls back to the manual implementation.
662+
/// This method uses automatic differentiation to compute gradients using DilatedConv2D operation.
663+
/// The layer uses NHWC format [batch, H, W, channels], while TensorOperations uses NCHW format,
664+
/// so format conversion is performed.
664665
/// </para>
665666
/// <para>
666-
/// Once dilated convolution operations are added to TensorOperations, this method will provide:
667+
/// This provides:
667668
/// - Automatic gradient computation through the computation graph
668669
/// - Verification of manual gradient implementations
669670
/// - Support for rapid prototyping with custom modifications
670671
/// </para>
671672
/// </remarks>
672673
private Tensor<T> BackwardViaAutodiff(Tensor<T> outputGradient)
673674
{
674-
// TODO: Implement autodiff backward pass once dilated convolution operations are available in TensorOperations
675-
// Convolution operation not yet available in TensorOperations
676-
// Falling back to manual implementation
677-
return BackwardManual(outputGradient);
675+
if (_lastInput == null)
676+
throw new InvalidOperationException("Forward pass must be called before backward pass.");
677+
678+
// Convert from NHWC [batch, H, W, channels] to NCHW [batch, channels, H, W]
679+
var inputNCHW = ConvertNHWCtoNCHW(_lastInput);
680+
var kernelNCHW = ConvertKernelToNCHW(_kernels);
681+
682+
// Create computation nodes
683+
var inputNode = Autodiff.TensorOperations<T>.Variable(inputNCHW, "input", requiresGradient: true);
684+
var kernelNode = Autodiff.TensorOperations<T>.Variable(kernelNCHW, "kernel", requiresGradient: true);
685+
var biasNode = Autodiff.TensorOperations<T>.Variable(ConvertVectorToTensor(_biases), "bias", requiresGradient: true);
686+
687+
// Forward pass using autodiff DilatedConv2D operation
688+
var outputNode = Autodiff.TensorOperations<T>.DilatedConv2D(
689+
inputNode,
690+
kernelNode,
691+
biasNode,
692+
stride: new int[] { _stride, _stride },
693+
padding: new int[] { _padding, _padding },
694+
dilation: new int[] { _dilation, _dilation });
695+
696+
// Apply activation function
697+
outputNode = ApplyActivationAutodiff(outputNode);
698+
699+
// Convert output gradient from NHWC to NCHW
700+
var outputGradientNCHW = ConvertNHWCtoNCHW(outputGradient);
701+
702+
// Perform backward pass
703+
outputNode.Gradient = outputGradientNCHW;
704+
var topoOrder = GetTopologicalOrder(outputNode);
705+
for (int i = topoOrder.Count - 1; i >= 0; i--)
706+
{
707+
var node = topoOrder[i];
708+
if (node.RequiresGradient && node.BackwardFunction != null && node.Gradient != null)
709+
{
710+
node.BackwardFunction(node.Gradient);
711+
}
712+
}
713+
714+
// Update parameter gradients
715+
if (kernelNode.Gradient != null)
716+
_kernelGradients = ConvertKernelFromNCHW(kernelNode.Gradient);
717+
718+
if (biasNode.Gradient != null)
719+
_biasGradients = ConvertTensorToVector(biasNode.Gradient);
720+
721+
// Convert input gradient from NCHW back to NHWC
722+
var inputGradientNCHW = inputNode.Gradient ?? throw new InvalidOperationException("Gradient computation failed.");
723+
return ConvertNCHWtoNHWC(inputGradientNCHW);
724+
}
725+
726+
/// <summary>
727+
/// Converts tensor from NHWC [batch, H, W, channels] to NCHW [batch, channels, H, W] format.
728+
/// </summary>
729+
private Tensor<T> ConvertNHWCtoNCHW(Tensor<T> nhwc)
730+
{
731+
int batch = nhwc.Shape[0];
732+
int height = nhwc.Shape[1];
733+
int width = nhwc.Shape[2];
734+
int channels = nhwc.Shape[3];
735+
736+
var nchw = new Tensor<T>([batch, channels, height, width]);
737+
for (int b = 0; b < batch; b++)
738+
for (int c = 0; c < channels; c++)
739+
for (int h = 0; h < height; h++)
740+
for (int w = 0; w < width; w++)
741+
nchw[b, c, h, w] = nhwc[b, h, w, c];
742+
743+
return nchw;
744+
}
745+
746+
/// <summary>
747+
/// Converts tensor from NCHW [batch, channels, H, W] to NHWC [batch, H, W, channels] format.
748+
/// </summary>
749+
private Tensor<T> ConvertNCHWtoNHWC(Tensor<T> nchw)
750+
{
751+
int batch = nchw.Shape[0];
752+
int channels = nchw.Shape[1];
753+
int height = nchw.Shape[2];
754+
int width = nchw.Shape[3];
755+
756+
var nhwc = new Tensor<T>([batch, height, width, channels]);
757+
for (int b = 0; b < batch; b++)
758+
for (int h = 0; h < height; h++)
759+
for (int w = 0; w < width; w++)
760+
for (int c = 0; c < channels; c++)
761+
nhwc[b, h, w, c] = nchw[b, c, h, w];
762+
763+
return nhwc;
764+
}
765+
766+
/// <summary>
767+
/// Converts kernel from [outputDepth, inputDepth, kH, kW] to [outputDepth, inputDepth, kH, kW] format.
768+
/// </summary>
769+
private Tensor<T> ConvertKernelToNCHW(Tensor<T> kernel)
770+
{
771+
// Already in the correct format
772+
return kernel;
773+
}
774+
775+
/// <summary>
776+
/// Converts kernel from NCHW back to original format.
777+
/// </summary>
778+
private Tensor<T> ConvertKernelFromNCHW(Tensor<T> kernel)
779+
{
780+
// Already in the correct format
781+
return kernel;
782+
}
783+
784+
/// <summary>
785+
/// Converts vector to 1D tensor.
786+
/// </summary>
787+
private Tensor<T> ConvertVectorToTensor(Vector<T> vector)
788+
{
789+
var tensor = new Tensor<T>([vector.Length]);
790+
for (int i = 0; i < vector.Length; i++)
791+
tensor[i] = vector[i];
792+
return tensor;
793+
}
794+
795+
/// <summary>
796+
/// Converts 1D tensor to vector.
797+
/// </summary>
798+
private Vector<T> ConvertTensorToVector(Tensor<T> tensor)
799+
{
800+
var vector = new Vector<T>(tensor.Shape[0]);
801+
for (int i = 0; i < tensor.Shape[0]; i++)
802+
vector[i] = tensor[i];
803+
return vector;
804+
}
805+
806+
/// <summary>
807+
/// Applies activation function using autodiff operations.
808+
/// </summary>
809+
private Autodiff.ComputationNode<T> ApplyActivationAutodiff(Autodiff.ComputationNode<T> input)
810+
{
811+
// Apply the appropriate activation function
812+
if (UsingVectorActivation)
813+
{
814+
if (VectorActivation is ReLUActivation<T>)
815+
return Autodiff.TensorOperations<T>.ReLU(input);
816+
else if (VectorActivation is SigmoidActivation<T>)
817+
return Autodiff.TensorOperations<T>.Sigmoid(input);
818+
else if (VectorActivation is TanhActivation<T>)
819+
return Autodiff.TensorOperations<T>.Tanh(input);
820+
else
821+
throw new NotSupportedException($"Activation {VectorActivation.GetType().Name} not yet supported in autodiff");
822+
}
823+
else
824+
{
825+
if (ScalarActivation is ReLUActivation<T>)
826+
return Autodiff.TensorOperations<T>.ReLU(input);
827+
else if (ScalarActivation is SigmoidActivation<T>)
828+
return Autodiff.TensorOperations<T>.Sigmoid(input);
829+
else if (ScalarActivation is TanhActivation<T>)
830+
return Autodiff.TensorOperations<T>.Tanh(input);
831+
else
832+
throw new NotSupportedException($"Activation {ScalarActivation.GetType().Name} not yet supported in autodiff");
833+
}
678834
}
679835

680836
/// <summary>

0 commit comments

Comments
 (0)