@@ -36,9 +36,9 @@ module subroutine init(self, input_shape)
3636 ! Kernel of shape filters x channels x kernel_size
3737 allocate (self % kernel(self % filters, self % channels, self % kernel_size))
3838
39- ! Initialize the kernel with random values
39+ ! Initialize the kernel with random values with a normal distribution
4040 call random_normal(self % kernel)
41- self % kernel = self % kernel / self % kernel_size
41+ self % kernel = self % kernel / self % kernel_size ** 2
4242
4343 allocate (self % biases(self % filters))
4444 self % biases = 0
@@ -62,50 +62,112 @@ pure module subroutine forward(self, input)
6262 class(locally_connected_1d_layer), intent (in out ) :: self
6363 real , intent (in ) :: input(:,:)
6464 integer :: input_width, input_channels
65- integer :: i, n
65+ integer :: i, n, i_out
66+ integer :: iws, iwe
67+ integer :: half_window
6668
69+ ! Get input dimensions
6770 input_channels = size (input, dim= 1 )
68- input_width = size (input, dim= 2 )
71+ input_width = size (input, dim= 2 )
6972
70- do concurrent (i = 1 :self % width)
71- do concurrent (n = 1 :self % filters)
72- self % z(n,i) = sum (self % kernel(n,:,:)* input(:,i:i+ self % kernel_size-1 ))
73+ ! For a kernel of odd size, half_window = kernel_size / 2 (integer division)
74+ half_window = self % kernel_size / 2
75+
76+ ! Loop over output indices rather than input indices.
77+ do i_out = 1 , self % width
78+ ! Compute the corresponding center index in the input.
79+ i = i_out + half_window
80+
81+ ! Define the window in the input corresponding to the filter kernel
82+ iws = i - half_window
83+ iwe = i + half_window
84+
85+ ! Compute the inner tensor product (sum of element-wise products)
86+ ! for each filter across all channels and positions in the kernel.
87+ do concurrent(n = 1 :self % filters)
88+ self % z(n, i_out) = sum (self % kernel(n, :, :) * input(:, iws:iwe))
7389 end do
74- end do
7590
76- ! Add bias
77- self % z = self % z + reshape (self % biases, shape (self % z))
91+ ! Add the bias for each filter.
92+ self % z(:, i_out) = self % z(:, i_out) + self % biases
93+ end do
7894
79- ! Apply activation
95+ ! Apply the activation function to get the final output.
8096 self % output = self % activation % eval(self % z)
81-
8297 end subroutine forward
8398
99+
84100 pure module subroutine backward(self, input, gradient)
85101 implicit none
86102 class(locally_connected_1d_layer), intent (in out ) :: self
87- real , intent (in ) :: input(:,:)
88- real , intent (in ) :: gradient(:,:)
103+ real , intent (in ) :: input(:,:) ! shape: (channels, width)
104+ real , intent (in ) :: gradient(:,:) ! shape: (filters, width)
105+
106+ ! Local gradient arrays:
89107 real :: db(self % filters)
90108 real :: dw(self % filters, self % channels, self % kernel_size)
91- real :: gdz(self % filters, self % width)
109+ real :: gdz(self % filters, size (input, 2 ))
110+
92111 integer :: i, n, k
93-
94- gdz = gradient * self % activation % eval_prime(self % z)
95-
112+ integer :: input_channels, input_width
113+ integer :: istart, iend
114+ integer :: iws, iwe
115+ integer :: half_window
116+
117+ ! Get input dimensions.
118+ input_channels = size (input, dim= 1 )
119+ input_width = size (input, dim= 2 )
120+
121+ ! For an odd-sized kernel, half_window = kernel_size / 2.
122+ half_window = self % kernel_size / 2
123+
124+ ! Define the valid output range so that the full input window is available.
125+ istart = half_window + 1
126+ iend = input_width - half_window
127+
128+ !- --------------------------------------------------------------------
129+ ! Compute the local gradient: gdz = (dL/dy) * sigma'(z)
130+ ! We assume self%z stores the pre-activation values from the forward pass.
131+ gdz = 0.0
132+ gdz(:, istart:iend) = gradient(:, istart:iend) * self % activation % eval_prime(self % z(:, istart:iend))
133+
134+ !- --------------------------------------------------------------------
135+ ! Compute gradient with respect to biases:
136+ ! dL/db(n) = sum_{i in valid range} gdz(n, i)
96137 do concurrent (n = 1 :self % filters)
97- db(n) = sum (gdz(n,: ))
138+ db(n) = sum (gdz(n, istart:iend ))
98139 end do
99-
100- dw = 0
101- self % gradient = 0
102- do concurrent (n = 1 :self % filters, k = 1 :self % channels, i = 1 :self % width)
103- dw(n,k,:) = dw(n,k,:) + input(k, i:i+ self % kernel_size-1 ) * gdz(n, i)
140+
141+ ! Initialize weight gradient and input gradient accumulators.
142+ dw = 0.0
143+ self % gradient = 0.0 ! This array is assumed preallocated to shape (channels, width)
144+
145+ !- --------------------------------------------------------------------
146+ ! Accumulate gradients over valid output positions.
147+ ! For each output position i, determine the corresponding input window indices.
148+ do concurrent (n = 1 :self % filters, &
149+ k = 1 :self % channels, &
150+ i = istart:iend)
151+ ! The input window corresponding to output index i:
152+ iws = i - half_window
153+ iwe = i + half_window
154+
155+ ! Weight gradient (dL/dw):
156+ ! For each kernel element, the contribution is the product of the input in the window
157+ ! and the local gradient at the output position i.
158+ dw(n, k, :) = dw(n, k, :) + input(k, iws:iwe) * gdz(n, i)
159+
160+ ! Input gradient (dL/dx):
161+ ! Distribute the effect of the output gradient back onto the input window,
162+ ! weighted by the kernel weights.
163+ self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, i)
104164 end do
105-
165+
166+ !- --------------------------------------------------------------------
167+ ! Accumulate the computed gradients into the layer's stored gradients.
106168 self % dw = self % dw + dw
107169 self % db = self % db + db
108-
170+
109171 end subroutine backward
110172
111173 pure module function get_num_params(self) result(num_params)
0 commit comments