@@ -659,22 +659,178 @@ private Tensor<T> BackwardManual(Tensor<T> outputGradient)
659659 /// <returns>The gradient of the loss with respect to the layer's input.</returns>
660660 /// <remarks>
661661 /// <para>
662- /// This method uses automatic differentiation to compute gradients. Currently, dilated convolution operations
663- /// are not yet available in TensorOperations, so this method falls back to the manual implementation.
662+ /// This method uses automatic differentiation to compute gradients using DilatedConv2D operation.
663+ /// The layer uses NHWC format [batch, H, W, channels], while TensorOperations uses NCHW format,
664+ /// so format conversion is performed.
664665 /// </para>
665666 /// <para>
666- /// Once dilated convolution operations are added to TensorOperations, this method will provide :
667+ /// This provides :
667668 /// - Automatic gradient computation through the computation graph
668669 /// - Verification of manual gradient implementations
669670 /// - Support for rapid prototyping with custom modifications
670671 /// </para>
671672 /// </remarks>
672673 private Tensor < T > BackwardViaAutodiff ( Tensor < T > outputGradient )
673674 {
674- // TODO: Implement autodiff backward pass once dilated convolution operations are available in TensorOperations
675- // Convolution operation not yet available in TensorOperations
676- // Falling back to manual implementation
677- return BackwardManual ( outputGradient ) ;
675+ if ( _lastInput == null )
676+ throw new InvalidOperationException ( "Forward pass must be called before backward pass." ) ;
677+
678+ // Convert from NHWC [batch, H, W, channels] to NCHW [batch, channels, H, W]
679+ var inputNCHW = ConvertNHWCtoNCHW ( _lastInput ) ;
680+ var kernelNCHW = ConvertKernelToNCHW ( _kernels ) ;
681+
682+ // Create computation nodes
683+ var inputNode = Autodiff . TensorOperations < T > . Variable ( inputNCHW , "input" , requiresGradient : true ) ;
684+ var kernelNode = Autodiff . TensorOperations < T > . Variable ( kernelNCHW , "kernel" , requiresGradient : true ) ;
685+ var biasNode = Autodiff . TensorOperations < T > . Variable ( ConvertVectorToTensor ( _biases ) , "bias" , requiresGradient : true ) ;
686+
687+ // Forward pass using autodiff DilatedConv2D operation
688+ var outputNode = Autodiff . TensorOperations < T > . DilatedConv2D (
689+ inputNode ,
690+ kernelNode ,
691+ biasNode ,
692+ stride : new int [ ] { _stride , _stride } ,
693+ padding : new int [ ] { _padding , _padding } ,
694+ dilation : new int [ ] { _dilation , _dilation } ) ;
695+
696+ // Apply activation function
697+ outputNode = ApplyActivationAutodiff ( outputNode ) ;
698+
699+ // Convert output gradient from NHWC to NCHW
700+ var outputGradientNCHW = ConvertNHWCtoNCHW ( outputGradient ) ;
701+
702+ // Perform backward pass
703+ outputNode . Gradient = outputGradientNCHW ;
704+ var topoOrder = GetTopologicalOrder ( outputNode ) ;
705+ for ( int i = topoOrder . Count - 1 ; i >= 0 ; i -- )
706+ {
707+ var node = topoOrder [ i ] ;
708+ if ( node . RequiresGradient && node . BackwardFunction != null && node . Gradient != null )
709+ {
710+ node . BackwardFunction ( node . Gradient ) ;
711+ }
712+ }
713+
714+ // Update parameter gradients
715+ if ( kernelNode . Gradient != null )
716+ _kernelGradients = ConvertKernelFromNCHW ( kernelNode . Gradient ) ;
717+
718+ if ( biasNode . Gradient != null )
719+ _biasGradients = ConvertTensorToVector ( biasNode . Gradient ) ;
720+
721+ // Convert input gradient from NCHW back to NHWC
722+ var inputGradientNCHW = inputNode . Gradient ?? throw new InvalidOperationException ( "Gradient computation failed." ) ;
723+ return ConvertNCHWtoNHWC ( inputGradientNCHW ) ;
724+ }
725+
726+ /// <summary>
727+ /// Converts tensor from NHWC [batch, H, W, channels] to NCHW [batch, channels, H, W] format.
728+ /// </summary>
729+ private Tensor < T > ConvertNHWCtoNCHW ( Tensor < T > nhwc )
730+ {
731+ int batch = nhwc . Shape [ 0 ] ;
732+ int height = nhwc . Shape [ 1 ] ;
733+ int width = nhwc . Shape [ 2 ] ;
734+ int channels = nhwc . Shape [ 3 ] ;
735+
736+ var nchw = new Tensor < T > ( [ batch , channels , height , width ] ) ;
737+ for ( int b = 0 ; b < batch ; b ++ )
738+ for ( int c = 0 ; c < channels ; c ++ )
739+ for ( int h = 0 ; h < height ; h ++ )
740+ for ( int w = 0 ; w < width ; w ++ )
741+ nchw [ b , c , h , w ] = nhwc [ b , h , w , c ] ;
742+
743+ return nchw ;
744+ }
745+
746+ /// <summary>
747+ /// Converts tensor from NCHW [batch, channels, H, W] to NHWC [batch, H, W, channels] format.
748+ /// </summary>
749+ private Tensor < T > ConvertNCHWtoNHWC ( Tensor < T > nchw )
750+ {
751+ int batch = nchw . Shape [ 0 ] ;
752+ int channels = nchw . Shape [ 1 ] ;
753+ int height = nchw . Shape [ 2 ] ;
754+ int width = nchw . Shape [ 3 ] ;
755+
756+ var nhwc = new Tensor < T > ( [ batch , height , width , channels ] ) ;
757+ for ( int b = 0 ; b < batch ; b ++ )
758+ for ( int h = 0 ; h < height ; h ++ )
759+ for ( int w = 0 ; w < width ; w ++ )
760+ for ( int c = 0 ; c < channels ; c ++ )
761+ nhwc [ b , h , w , c ] = nchw [ b , c , h , w ] ;
762+
763+ return nhwc ;
764+ }
765+
766+ /// <summary>
767+ /// Converts kernel from [outputDepth, inputDepth, kH, kW] to [outputDepth, inputDepth, kH, kW] format.
768+ /// </summary>
769+ private Tensor < T > ConvertKernelToNCHW ( Tensor < T > kernel )
770+ {
771+ // Already in the correct format
772+ return kernel ;
773+ }
774+
775+ /// <summary>
776+ /// Converts kernel from NCHW back to original format.
777+ /// </summary>
778+ private Tensor < T > ConvertKernelFromNCHW ( Tensor < T > kernel )
779+ {
780+ // Already in the correct format
781+ return kernel ;
782+ }
783+
784+ /// <summary>
785+ /// Converts vector to 1D tensor.
786+ /// </summary>
787+ private Tensor < T > ConvertVectorToTensor ( Vector < T > vector )
788+ {
789+ var tensor = new Tensor < T > ( [ vector . Length ] ) ;
790+ for ( int i = 0 ; i < vector . Length ; i ++ )
791+ tensor [ i ] = vector [ i ] ;
792+ return tensor ;
793+ }
794+
795+ /// <summary>
796+ /// Converts 1D tensor to vector.
797+ /// </summary>
798+ private Vector < T > ConvertTensorToVector ( Tensor < T > tensor )
799+ {
800+ var vector = new Vector < T > ( tensor . Shape [ 0 ] ) ;
801+ for ( int i = 0 ; i < tensor . Shape [ 0 ] ; i ++ )
802+ vector [ i ] = tensor [ i ] ;
803+ return vector ;
804+ }
805+
806+ /// <summary>
807+ /// Applies activation function using autodiff operations.
808+ /// </summary>
809+ private Autodiff . ComputationNode < T > ApplyActivationAutodiff ( Autodiff . ComputationNode < T > input )
810+ {
811+ // Apply the appropriate activation function
812+ if ( UsingVectorActivation )
813+ {
814+ if ( VectorActivation is ReLUActivation < T > )
815+ return Autodiff . TensorOperations < T > . ReLU ( input ) ;
816+ else if ( VectorActivation is SigmoidActivation < T > )
817+ return Autodiff . TensorOperations < T > . Sigmoid ( input ) ;
818+ else if ( VectorActivation is TanhActivation < T > )
819+ return Autodiff . TensorOperations < T > . Tanh ( input ) ;
820+ else
821+ throw new NotSupportedException ( $ "Activation { VectorActivation . GetType ( ) . Name } not yet supported in autodiff") ;
822+ }
823+ else
824+ {
825+ if ( ScalarActivation is ReLUActivation < T > )
826+ return Autodiff . TensorOperations < T > . ReLU ( input ) ;
827+ else if ( ScalarActivation is SigmoidActivation < T > )
828+ return Autodiff . TensorOperations < T > . Sigmoid ( input ) ;
829+ else if ( ScalarActivation is TanhActivation < T > )
830+ return Autodiff . TensorOperations < T > . Tanh ( input ) ;
831+ else
832+ throw new NotSupportedException ( $ "Activation { ScalarActivation . GetType ( ) . Name } not yet supported in autodiff") ;
833+ }
678834 }
679835
680836 /// <summary>
0 commit comments