Skip to content

Commit 4332dc2

Browse files
committed
feat: Add LocallyConnectedConv2D operation and update LocallyConnectedLayer
LocallyConnectedConv2D implements position-specific convolution where weights are NOT shared across spatial locations. Key features: - Uses 6D weights: [out_h, out_w, out_channels, in_channels, kernel_h, kernel_w] - Each output position has its own unique set of filters - Critical for tasks where different spatial regions have fundamentally different characteristics - Common in face recognition where facial features are at specific locations Layer updates: - LocallyConnectedLayer now uses LocallyConnectedConv2D operation - Handles NHWC/NCHW format conversion between layer and operations - Converts weight format from [..., kH, kW, in_ch] to [..., in_ch, kH, kW] - Full autodiff support with parameter gradient computation Total TensorOperations: 37 (36 previous + 1 new) Layers with autodiff: 23 (22 previous + 1 new)
1 parent 1c9b79d commit 4332dc2

File tree

2 files changed

+429
-4
lines changed

2 files changed

+429
-4
lines changed

src/Autodiff/TensorOperations.cs

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4096,5 +4096,245 @@ void BackwardFunction(Tensor<T> gradient)
40964096

40974097
return node;
40984098
}
4099+
4100+
/// <summary>
4101+
/// Performs locally connected 2D convolution where weights are NOT shared across spatial locations.
4102+
/// </summary>
4103+
/// <param name="input">Input tensor of shape [batch, in_channels, height, width]</param>
4104+
/// <param name="weights">Weight tensor of shape [out_h, out_w, out_channels, in_channels, kernel_h, kernel_w]</param>
4105+
/// <param name="bias">Optional bias tensor of shape [out_channels]</param>
4106+
/// <param name="stride">Stride for the convolution, defaults to [1, 1]</param>
4107+
/// <returns>Output tensor of shape [batch, out_channels, out_h, out_w]</returns>
4108+
/// <remarks>
4109+
/// <para>
4110+
/// Locally connected convolution is like regular convolution but uses different weights for each
4111+
/// spatial output location. This increases parameters but allows position-specific feature detection.
4112+
/// </para>
4113+
/// <para>
4114+
/// Unlike Conv2D where weights are shared across all positions, LocallyConnectedConv2D uses
4115+
/// unique weights for each (h,w) output position. This is useful when different regions have
4116+
/// fundamentally different characteristics (e.g., face recognition where eyes/nose/mouth are
4117+
/// at specific locations).
4118+
/// </para>
4119+
/// <para>
4120+
/// Forward pass applies position-specific filters at each output location.
4121+
/// Backward pass computes gradients with respect to input, position-specific weights, and bias.
4122+
/// </para>
4123+
/// </remarks>
4124+
public static ComputationNode<T> LocallyConnectedConv2D(
4125+
ComputationNode<T> input,
4126+
ComputationNode<T> weights,
4127+
ComputationNode<T>? bias = null,
4128+
int[]? stride = null)
4129+
{
4130+
var numOps = MathHelper.GetNumericOperations<T>();
4131+
var inputShape = input.Value.Shape;
4132+
var weightsShape = weights.Value.Shape;
4133+
4134+
// Validate input shape (must be 4D: [batch, in_channels, height, width])
4135+
if (inputShape.Length != 4)
4136+
throw new ArgumentException("Input must be 4D tensor [batch, in_channels, height, width]");
4137+
4138+
// Validate weights shape (must be 6D: [out_h, out_w, out_channels, in_channels, kernel_h, kernel_w])
4139+
if (weightsShape.Length != 6)
4140+
throw new ArgumentException("Weights must be 6D tensor [out_h, out_w, out_channels, in_channels, kernel_h, kernel_w]");
4141+
4142+
// Default stride
4143+
stride ??= new int[] { 1, 1 };
4144+
if (stride.Length != 2)
4145+
throw new ArgumentException("Stride must be 2D array [height, width]");
4146+
4147+
int batch = inputShape[0];
4148+
int inChannels = inputShape[1];
4149+
int inHeight = inputShape[2];
4150+
int inWidth = inputShape[3];
4151+
int outHeight = weightsShape[0];
4152+
int outWidth = weightsShape[1];
4153+
int outChannels = weightsShape[2];
4154+
int kernelHeight = weightsShape[4];
4155+
int kernelWidth = weightsShape[5];
4156+
int strideH = stride[0];
4157+
int strideW = stride[1];
4158+
4159+
// Validate weight dimensions match input
4160+
if (weightsShape[3] != inChannels)
4161+
throw new ArgumentException($"Weight in_channels ({weightsShape[3]}) must match input in_channels ({inChannels})");
4162+
4163+
// Validate bias if provided
4164+
if (bias != null)
4165+
{
4166+
var biasShape = bias.Value.Shape;
4167+
if (biasShape.Length != 1 || biasShape[0] != outChannels)
4168+
throw new ArgumentException($"Bias must be 1D tensor of length {outChannels}");
4169+
}
4170+
4171+
var outputShape = new int[] { batch, outChannels, outHeight, outWidth };
4172+
var result = new Tensor<T>(outputShape);
4173+
4174+
// Forward pass: Locally connected convolution
4175+
for (int b = 0; b < batch; b++)
4176+
{
4177+
for (int oh = 0; oh < outHeight; oh++)
4178+
{
4179+
for (int ow = 0; ow < outWidth; ow++)
4180+
{
4181+
for (int oc = 0; oc < outChannels; oc++)
4182+
{
4183+
T sum = numOps.Zero;
4184+
4185+
// Apply position-specific filter
4186+
for (int ic = 0; ic < inChannels; ic++)
4187+
{
4188+
for (int kh = 0; kh < kernelHeight; kh++)
4189+
{
4190+
for (int kw = 0; kw < kernelWidth; kw++)
4191+
{
4192+
int ih = oh * strideH + kh;
4193+
int iw = ow * strideW + kw;
4194+
4195+
// Check bounds
4196+
if (ih < inHeight && iw < inWidth)
4197+
{
4198+
T inputVal = input.Value[b, ic, ih, iw];
4199+
T weightVal = weights.Value[oh, ow, oc, ic, kh, kw];
4200+
sum = numOps.Add(sum, numOps.Multiply(inputVal, weightVal));
4201+
}
4202+
}
4203+
}
4204+
}
4205+
4206+
// Add bias if provided
4207+
if (bias != null)
4208+
sum = numOps.Add(sum, bias.Value[oc]);
4209+
4210+
result[b, oc, oh, ow] = sum;
4211+
}
4212+
}
4213+
}
4214+
}
4215+
4216+
void BackwardFunction(Tensor<T> gradient)
4217+
{
4218+
// Gradient w.r.t. input
4219+
if (input.RequiresGradient)
4220+
{
4221+
if (input.Gradient == null)
4222+
input.Gradient = new Tensor<T>(inputShape);
4223+
4224+
for (int b = 0; b < batch; b++)
4225+
{
4226+
for (int oh = 0; oh < outHeight; oh++)
4227+
{
4228+
for (int ow = 0; ow < outWidth; ow++)
4229+
{
4230+
for (int oc = 0; oc < outChannels; oc++)
4231+
{
4232+
T grad = gradient[b, oc, oh, ow];
4233+
4234+
for (int ic = 0; ic < inChannels; ic++)
4235+
{
4236+
for (int kh = 0; kh < kernelHeight; kh++)
4237+
{
4238+
for (int kw = 0; kw < kernelWidth; kw++)
4239+
{
4240+
int ih = oh * strideH + kh;
4241+
int iw = ow * strideW + kw;
4242+
4243+
if (ih < inHeight && iw < inWidth)
4244+
{
4245+
T weightVal = weights.Value[oh, ow, oc, ic, kh, kw];
4246+
T delta = numOps.Multiply(grad, weightVal);
4247+
input.Gradient[b, ic, ih, iw] = numOps.Add(
4248+
input.Gradient[b, ic, ih, iw], delta);
4249+
}
4250+
}
4251+
}
4252+
}
4253+
}
4254+
}
4255+
}
4256+
}
4257+
}
4258+
4259+
// Gradient w.r.t. weights
4260+
if (weights.RequiresGradient)
4261+
{
4262+
if (weights.Gradient == null)
4263+
weights.Gradient = new Tensor<T>(weightsShape);
4264+
4265+
for (int b = 0; b < batch; b++)
4266+
{
4267+
for (int oh = 0; oh < outHeight; oh++)
4268+
{
4269+
for (int ow = 0; ow < outWidth; ow++)
4270+
{
4271+
for (int oc = 0; oc < outChannels; oc++)
4272+
{
4273+
T grad = gradient[b, oc, oh, ow];
4274+
4275+
for (int ic = 0; ic < inChannels; ic++)
4276+
{
4277+
for (int kh = 0; kh < kernelHeight; kh++)
4278+
{
4279+
for (int kw = 0; kw < kernelWidth; kw++)
4280+
{
4281+
int ih = oh * strideH + kh;
4282+
int iw = ow * strideW + kw;
4283+
4284+
if (ih < inHeight && iw < inWidth)
4285+
{
4286+
T inputVal = input.Value[b, ic, ih, iw];
4287+
T delta = numOps.Multiply(grad, inputVal);
4288+
weights.Gradient[oh, ow, oc, ic, kh, kw] = numOps.Add(
4289+
weights.Gradient[oh, ow, oc, ic, kh, kw], delta);
4290+
}
4291+
}
4292+
}
4293+
}
4294+
}
4295+
}
4296+
}
4297+
}
4298+
}
4299+
4300+
// Gradient w.r.t. bias
4301+
if (bias != null && bias.RequiresGradient)
4302+
{
4303+
if (bias.Gradient == null)
4304+
bias.Gradient = new Tensor<T>(new int[] { outChannels });
4305+
4306+
for (int b = 0; b < batch; b++)
4307+
{
4308+
for (int oc = 0; oc < outChannels; oc++)
4309+
{
4310+
for (int oh = 0; oh < outHeight; oh++)
4311+
{
4312+
for (int ow = 0; ow < outWidth; ow++)
4313+
{
4314+
bias.Gradient[oc] = numOps.Add(bias.Gradient[oc], gradient[b, oc, oh, ow]);
4315+
}
4316+
}
4317+
}
4318+
}
4319+
}
4320+
}
4321+
4322+
var parents = bias != null
4323+
? new List<ComputationNode<T>> { input, weights, bias }
4324+
: new List<ComputationNode<T>> { input, weights };
4325+
4326+
var node = new ComputationNode<T>(
4327+
value: result,
4328+
requiresGradient: input.RequiresGradient || weights.RequiresGradient || (bias?.RequiresGradient ?? false),
4329+
parents: parents,
4330+
backwardFunction: BackwardFunction,
4331+
name: null);
4332+
4333+
var tape = GradientTape<T>.Current;
4334+
if (tape != null && tape.IsRecording)
4335+
tape.RecordOperation(node);
4336+
4337+
return node;
4338+
}
40994339
}
41004340
}

0 commit comments

Comments
 (0)