@@ -3843,5 +3843,258 @@ void BackwardFunction(Tensor<T> gradient)
38433843
38443844 return node ;
38453845 }
3846+
3847+ /// <summary>
3848+ /// Performs depthwise 2D convolution where each input channel is convolved with its own set of filters.
3849+ /// </summary>
3850+ /// <param name="input">Input tensor of shape [batch, in_channels, height, width]</param>
3851+ /// <param name="kernel">Kernel tensor of shape [in_channels, multiplier, kernel_height, kernel_width]</param>
3852+ /// <param name="bias">Optional bias tensor of shape [in_channels * multiplier]</param>
3853+ /// <param name="stride">Stride for the convolution, defaults to [1, 1]</param>
3854+ /// <param name="padding">Padding for the convolution, defaults to [0, 0]</param>
3855+ /// <returns>Output tensor of shape [batch, in_channels * multiplier, out_height, out_width]</returns>
3856+ /// <remarks>
3857+ /// <para>
3858+ /// Depthwise convolution applies a separate filter to each input channel independently, with no mixing
3859+ /// across channels. This is in contrast to standard convolution which mixes all input channels.
3860+ /// Each input channel gets 'multiplier' filters applied to it, producing 'multiplier' output channels.
3861+ /// The total output channels is in_channels * multiplier.
3862+ /// </para>
3863+ /// <para>
3864+ /// This operation is commonly used in MobileNets and other efficient architectures, often followed
3865+ /// by a pointwise (1x1) convolution to mix channels. The combination dramatically reduces
3866+ /// computational cost compared to standard convolution.
3867+ /// </para>
3868+ /// <para>
3869+ /// Forward pass computes the depthwise convolution by applying each filter only to its corresponding
3870+ /// input channel. Backward pass computes gradients with respect to input, kernel, and bias.
3871+ /// </para>
3872+ /// </remarks>
3873+ public static ComputationNode < T > DepthwiseConv2D (
3874+ ComputationNode < T > input ,
3875+ ComputationNode < T > kernel ,
3876+ ComputationNode < T > ? bias = null ,
3877+ int [ ] ? stride = null ,
3878+ int [ ] ? padding = null )
3879+ {
3880+ var numOps = MathHelper . GetNumericOperations < T > ( ) ;
3881+ var inputShape = input . Value . Shape ;
3882+ var kernelShape = kernel . Value . Shape ;
3883+
3884+ // Validate input shape (must be 4D: [batch, in_channels, height, width])
3885+ if ( inputShape . Length != 4 )
3886+ throw new ArgumentException ( "Input must be 4D tensor [batch, in_channels, height, width]" ) ;
3887+
3888+ // Validate kernel shape (must be 4D: [in_channels, multiplier, kernel_height, kernel_width])
3889+ if ( kernelShape . Length != 4 )
3890+ throw new ArgumentException ( "Kernel must be 4D tensor [in_channels, multiplier, kernel_height, kernel_width]" ) ;
3891+
3892+ if ( inputShape [ 1 ] != kernelShape [ 0 ] )
3893+ throw new ArgumentException ( $ "Input channels ({ inputShape [ 1 ] } ) must match kernel input channels ({ kernelShape [ 0 ] } )") ;
3894+
3895+ // Default stride and padding
3896+ stride ??= new int [ ] { 1 , 1 } ;
3897+ padding ??= new int [ ] { 0 , 0 } ;
3898+
3899+ if ( stride . Length != 2 || padding . Length != 2 )
3900+ throw new ArgumentException ( "Stride and padding must be 2D arrays [height, width]" ) ;
3901+
3902+ int batch = inputShape [ 0 ] ;
3903+ int inChannels = inputShape [ 1 ] ;
3904+ int inHeight = inputShape [ 2 ] ;
3905+ int inWidth = inputShape [ 3 ] ;
3906+ int multiplier = kernelShape [ 1 ] ;
3907+ int kernelHeight = kernelShape [ 2 ] ;
3908+ int kernelWidth = kernelShape [ 3 ] ;
3909+ int strideH = stride [ 0 ] ;
3910+ int strideW = stride [ 1 ] ;
3911+ int padH = padding [ 0 ] ;
3912+ int padW = padding [ 1 ] ;
3913+
3914+ // Calculate output dimensions
3915+ int outHeight = ( inHeight + 2 * padH - kernelHeight ) / strideH + 1 ;
3916+ int outWidth = ( inWidth + 2 * padW - kernelWidth ) / strideW + 1 ;
3917+ int outChannels = inChannels * multiplier ;
3918+
3919+ // Validate bias if provided
3920+ if ( bias != null )
3921+ {
3922+ var biasShape = bias . Value . Shape ;
3923+ if ( biasShape . Length != 1 || biasShape [ 0 ] != outChannels )
3924+ throw new ArgumentException ( $ "Bias must be 1D tensor of length { outChannels } ") ;
3925+ }
3926+
3927+ var outputShape = new int [ ] { batch , outChannels , outHeight , outWidth } ;
3928+ var result = new Tensor < T > ( outputShape ) ;
3929+
3930+ // Forward pass: Depthwise convolution
3931+ // For each input channel c, apply multiplier filters to produce multiplier output channels
3932+ for ( int b = 0 ; b < batch ; b ++ )
3933+ {
3934+ for ( int ic = 0 ; ic < inChannels ; ic ++ )
3935+ {
3936+ for ( int m = 0 ; m < multiplier ; m ++ )
3937+ {
3938+ int oc = ic * multiplier + m ; // Output channel index
3939+ for ( int oh = 0 ; oh < outHeight ; oh ++ )
3940+ {
3941+ for ( int ow = 0 ; ow < outWidth ; ow ++ )
3942+ {
3943+ T sum = numOps . Zero ;
3944+
3945+ // Convolve with the kernel for this input channel and multiplier
3946+ for ( int kh = 0 ; kh < kernelHeight ; kh ++ )
3947+ {
3948+ for ( int kw = 0 ; kw < kernelWidth ; kw ++ )
3949+ {
3950+ int ih = oh * strideH + kh - padH ;
3951+ int iw = ow * strideW + kw - padW ;
3952+
3953+ // Check bounds (padding is implicit - zero outside bounds)
3954+ if ( ih >= 0 && ih < inHeight && iw >= 0 && iw < inWidth )
3955+ {
3956+ T inputVal = input . Value [ b , ic , ih , iw ] ;
3957+ T kernelVal = kernel . Value [ ic , m , kh , kw ] ;
3958+ sum = numOps . Add ( sum , numOps . Multiply ( inputVal , kernelVal ) ) ;
3959+ }
3960+ }
3961+ }
3962+
3963+ // Add bias if provided
3964+ if ( bias != null )
3965+ sum = numOps . Add ( sum , bias . Value [ oc ] ) ;
3966+
3967+ result [ b , oc , oh , ow ] = sum ;
3968+ }
3969+ }
3970+ }
3971+ }
3972+ }
3973+
3974+ void BackwardFunction ( Tensor < T > gradient )
3975+ {
3976+ // Gradient w.r.t. input
3977+ if ( input . RequiresGradient )
3978+ {
3979+ if ( input . Gradient == null )
3980+ input . Gradient = new Tensor < T > ( inputShape ) ;
3981+
3982+ for ( int b = 0 ; b < batch ; b ++ )
3983+ {
3984+ for ( int ic = 0 ; ic < inChannels ; ic ++ )
3985+ {
3986+ for ( int m = 0 ; m < multiplier ; m ++ )
3987+ {
3988+ int oc = ic * multiplier + m ;
3989+ for ( int oh = 0 ; oh < outHeight ; oh ++ )
3990+ {
3991+ for ( int ow = 0 ; ow < outWidth ; ow ++ )
3992+ {
3993+ T grad = gradient [ b , oc , oh , ow ] ;
3994+
3995+ for ( int kh = 0 ; kh < kernelHeight ; kh ++ )
3996+ {
3997+ for ( int kw = 0 ; kw < kernelWidth ; kw ++ )
3998+ {
3999+ int ih = oh * strideH + kh - padH ;
4000+ int iw = ow * strideW + kw - padW ;
4001+
4002+ if ( ih >= 0 && ih < inHeight && iw >= 0 && iw < inWidth )
4003+ {
4004+ T kernelVal = kernel . Value [ ic , m , kh , kw ] ;
4005+ T delta = numOps . Multiply ( grad , kernelVal ) ;
4006+ input . Gradient [ b , ic , ih , iw ] = numOps . Add (
4007+ input . Gradient [ b , ic , ih , iw ] , delta ) ;
4008+ }
4009+ }
4010+ }
4011+ }
4012+ }
4013+ }
4014+ }
4015+ }
4016+ }
4017+
4018+ // Gradient w.r.t. kernel
4019+ if ( kernel . RequiresGradient )
4020+ {
4021+ if ( kernel . Gradient == null )
4022+ kernel . Gradient = new Tensor < T > ( kernelShape ) ;
4023+
4024+ for ( int b = 0 ; b < batch ; b ++ )
4025+ {
4026+ for ( int ic = 0 ; ic < inChannels ; ic ++ )
4027+ {
4028+ for ( int m = 0 ; m < multiplier ; m ++ )
4029+ {
4030+ int oc = ic * multiplier + m ;
4031+ for ( int oh = 0 ; oh < outHeight ; oh ++ )
4032+ {
4033+ for ( int ow = 0 ; ow < outWidth ; ow ++ )
4034+ {
4035+ T grad = gradient [ b , oc , oh , ow ] ;
4036+
4037+ for ( int kh = 0 ; kh < kernelHeight ; kh ++ )
4038+ {
4039+ for ( int kw = 0 ; kw < kernelWidth ; kw ++ )
4040+ {
4041+ int ih = oh * strideH + kh - padH ;
4042+ int iw = ow * strideW + kw - padW ;
4043+
4044+ if ( ih >= 0 && ih < inHeight && iw >= 0 && iw < inWidth )
4045+ {
4046+ T inputVal = input . Value [ b , ic , ih , iw ] ;
4047+ T delta = numOps . Multiply ( grad , inputVal ) ;
4048+ kernel . Gradient [ ic , m , kh , kw ] = numOps . Add (
4049+ kernel . Gradient [ ic , m , kh , kw ] , delta ) ;
4050+ }
4051+ }
4052+ }
4053+ }
4054+ }
4055+ }
4056+ }
4057+ }
4058+ }
4059+
4060+ // Gradient w.r.t. bias
4061+ if ( bias != null && bias . RequiresGradient )
4062+ {
4063+ if ( bias . Gradient == null )
4064+ bias . Gradient = new Tensor < T > ( new int [ ] { outChannels } ) ;
4065+
4066+ for ( int b = 0 ; b < batch ; b ++ )
4067+ {
4068+ for ( int oc = 0 ; oc < outChannels ; oc ++ )
4069+ {
4070+ for ( int oh = 0 ; oh < outHeight ; oh ++ )
4071+ {
4072+ for ( int ow = 0 ; ow < outWidth ; ow ++ )
4073+ {
4074+ bias . Gradient [ oc ] = numOps . Add ( bias . Gradient [ oc ] , gradient [ b , oc , oh , ow ] ) ;
4075+ }
4076+ }
4077+ }
4078+ }
4079+ }
4080+ }
4081+
4082+ var parents = bias != null
4083+ ? new List < ComputationNode < T > > { input , kernel , bias }
4084+ : new List < ComputationNode < T > > { input , kernel } ;
4085+
4086+ var node = new ComputationNode < T > (
4087+ value : result ,
4088+ requiresGradient : input . RequiresGradient || kernel . RequiresGradient || ( bias ? . RequiresGradient ?? false ) ,
4089+ parents : parents ,
4090+ backwardFunction : BackwardFunction ,
4091+ name : null ) ;
4092+
4093+ var tape = GradientTape < T > . Current ;
4094+ if ( tape != null && tape . IsRecording )
4095+ tape . RecordOperation ( node ) ;
4096+
4097+ return node ;
4098+ }
38464099}
38474100}
0 commit comments