|
4 | 4 | import net.echo.brain4j.activation.Activations; |
5 | 5 | import net.echo.brain4j.convolution.Kernel; |
6 | 6 | import net.echo.brain4j.layer.Layer; |
7 | | -import net.echo.brain4j.layer.impl.DenseLayer; |
8 | 7 | import net.echo.brain4j.structure.Neuron; |
9 | 8 | import net.echo.brain4j.structure.cache.Parameters; |
10 | 9 | import net.echo.brain4j.structure.cache.StatesCache; |
11 | 10 | import net.echo.brain4j.training.optimizers.Optimizer; |
12 | 11 | import net.echo.brain4j.training.updater.Updater; |
13 | | -import org.checkerframework.checker.units.qual.K; |
14 | 12 |
|
15 | 13 | import java.util.ArrayList; |
16 | 14 | import java.util.List; |
@@ -130,50 +128,87 @@ public Kernel forward(StatesCache cache, Layer<?, ?> lastLayer, Kernel input) { |
130 | 128 | public void propagate(StatesCache cache, Layer<?, ?> nextLayer, Updater updater, Optimizer optimizer) { |
131 | 129 | Kernel featureMap = cache.getFeatureMap(this); |
132 | 130 |
|
133 | | - if (nextLayer instanceof FlattenLayer) { |
134 | | - List<Neuron> neurons = nextLayer.getNeurons(); |
135 | | - Kernel deltaKernel = new Kernel(featureMap.getWidth(), featureMap.getHeight()); |
| 131 | + switch (nextLayer) { |
| 132 | + case FlattenLayer flattenLayer -> { |
| 133 | + List<Neuron> neurons = nextLayer.getNeurons(); |
| 134 | + Kernel deltaKernel = new Kernel(featureMap.getWidth(), featureMap.getHeight()); |
136 | 135 |
|
137 | | - for (int h = 0; h < featureMap.getHeight(); h++) { |
138 | | - for (int w = 0; w < featureMap.getWidth(); w++) { |
139 | | - int index = h * featureMap.getWidth() + w; |
140 | | - double deltaNeuron = neurons.get(index).getDelta(cache); |
| 136 | + for (int h = 0; h < featureMap.getHeight(); h++) { |
| 137 | + for (int w = 0; w < featureMap.getWidth(); w++) { |
| 138 | + int index = h * featureMap.getWidth() + w; |
| 139 | + double deltaNeuron = neurons.get(index).getDelta(cache); |
141 | 140 |
|
142 | | - double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h)); |
143 | | - double localDelta = deltaNeuron * derivative; |
| 141 | + double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h)); |
| 142 | + double localDelta = deltaNeuron * derivative; |
144 | 143 |
|
145 | | - deltaKernel.setValue(w, h, localDelta); |
| 144 | + deltaKernel.setValue(w, h, localDelta); |
| 145 | + } |
146 | 146 | } |
| 147 | + |
| 148 | + updateParameters(cache, optimizer, deltaKernel); |
147 | 149 | } |
| 150 | + case ConvLayer nextConvLayer -> { |
| 151 | + Kernel deltaNext = cache.getDelta(nextConvLayer); |
| 152 | + Kernel deltaCurrent = new Kernel(featureMap.getWidth(), featureMap.getHeight()); |
148 | 153 |
|
149 | | - updateParameters(cache, optimizer, deltaKernel); |
150 | | - } else if (nextLayer instanceof ConvLayer nextConvLayer) { |
151 | | - Kernel deltaNext = cache.getDelta(nextConvLayer); |
152 | | - Kernel deltaCurrent = new Kernel(featureMap.getWidth(), featureMap.getHeight()); |
| 154 | + for (Kernel nextKernel : nextConvLayer.getKernels()) { |
| 155 | + Kernel rotatedKernel = nextKernel.rotate180(); |
| 156 | + Kernel contribution = deltaNext.convolute(rotatedKernel, 1); |
153 | 157 |
|
154 | | - for (Kernel nextKernel : nextConvLayer.getKernels()) { |
155 | | - Kernel rotatedKernel = nextKernel.rotate180(); |
156 | | - Kernel contribution = deltaNext.convolute(rotatedKernel, 1); |
| 158 | + if (contribution.getWidth() != deltaCurrent.getWidth() || contribution.getHeight() != deltaCurrent.getHeight()) { |
| 159 | + contribution = cropTo(contribution, deltaCurrent.getWidth(), deltaCurrent.getHeight()); |
| 160 | + } |
157 | 161 |
|
158 | | - if (contribution.getWidth() != deltaCurrent.getWidth() || contribution.getHeight() != deltaCurrent.getHeight()) { |
159 | | - contribution = cropTo(contribution, deltaCurrent.getWidth(), deltaCurrent.getHeight()); |
| 162 | + deltaCurrent.add(contribution); |
160 | 163 | } |
161 | 164 |
|
162 | | - deltaCurrent.add(contribution); |
163 | | - } |
164 | | - |
165 | | - for (int h = 0; h < deltaCurrent.getHeight(); h++) { |
166 | | - for (int w = 0; w < deltaCurrent.getWidth(); w++) { |
167 | | - double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h)); |
168 | | - double updatedDelta = clipGradient(deltaCurrent.getValue(w, h) * derivative); |
| 165 | + for (int h = 0; h < deltaCurrent.getHeight(); h++) { |
| 166 | + for (int w = 0; w < deltaCurrent.getWidth(); w++) { |
| 167 | + double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h)); |
| 168 | + double updatedDelta = clipGradient(deltaCurrent.getValue(w, h) * derivative); |
169 | 169 |
|
170 | | - deltaCurrent.setValue(w, h, updatedDelta); |
| 170 | + deltaCurrent.setValue(w, h, updatedDelta); |
| 171 | + } |
171 | 172 | } |
| 173 | + |
| 174 | + updateParameters(cache, optimizer, deltaCurrent); |
172 | 175 | } |
| 176 | + case PoolingLayer poolingLayer -> { |
| 177 | + Kernel deltaPooling = cache.getDelta(poolingLayer); |
| 178 | + Kernel deltaUnpooled = new Kernel(featureMap.getWidth(), featureMap.getHeight()); |
| 179 | + |
| 180 | + int poolWidth = poolingLayer.getKernelWidth(); |
| 181 | + int poolHeight = poolingLayer.getKernelHeight(); |
| 182 | + int poolStride = poolingLayer.getStride(); |
| 183 | + |
| 184 | + for (int ph = 0; ph < deltaPooling.getHeight(); ph++) { |
| 185 | + for (int pw = 0; pw < deltaPooling.getWidth(); pw++) { |
| 186 | + double deltaVal = deltaPooling.getValue(pw, ph); |
| 187 | + |
| 188 | + int startX = pw * poolStride; |
| 189 | + int startY = ph * poolStride; |
| 190 | + |
| 191 | + for (int y = startY; y < startY + poolHeight && y < featureMap.getHeight(); y++) { |
| 192 | + for (int x = startX; x < startX + poolWidth && x < featureMap.getWidth(); x++) { |
| 193 | + double current = deltaUnpooled.getValue(x, y); |
| 194 | + current += deltaVal / (poolWidth * poolHeight); |
| 195 | + deltaUnpooled.setValue(x, y, current); |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + for (int h = 0; h < deltaUnpooled.getHeight(); h++) { |
| 202 | + for (int w = 0; w < deltaUnpooled.getWidth(); w++) { |
| 203 | + double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h)); |
| 204 | + double updatedDelta = clipGradient(deltaUnpooled.getValue(w, h) * derivative); |
| 205 | + deltaUnpooled.setValue(w, h, updatedDelta); |
| 206 | + } |
| 207 | + } |
173 | 208 |
|
174 | | - updateParameters(cache, optimizer, deltaCurrent); |
175 | | - } else { |
176 | | - throw new UnsupportedOperationException("Propagation not support for " + nextLayer.getClass().getSimpleName()); |
| 209 | + updateParameters(cache, optimizer, deltaUnpooled); |
| 210 | + } |
| 211 | + default -> throw new UnsupportedOperationException("Propagation not support for " + nextLayer.getClass().getSimpleName()); |
177 | 212 | } |
178 | 213 | } |
179 | 214 |
|
|
0 commit comments