More work on the pooling layer

xEcho1337 · xEcho1337 · commit 86e685a0058d · 2025-03-07T18:15:47.000+01:00
diff --git a/src/main/java/net/echo/brain4j/convolution/pooling/PoolingFunction.java b/src/main/java/net/echo/brain4j/convolution/pooling/PoolingFunction.java
@@ -6,4 +6,6 @@
 public interface PoolingFunction {
 
     double apply(PoolingLayer layer, Kernel input, int i, int j);
+
+    void unpool(PoolingLayer layer, int outX, int outY, Kernel deltaPooling, Kernel deltaUnpooled, Kernel input);
 }
diff --git a/src/main/java/net/echo/brain4j/convolution/pooling/impl/AveragePooling.java b/src/main/java/net/echo/brain4j/convolution/pooling/impl/AveragePooling.java
@@ -31,4 +31,25 @@ public double apply(PoolingLayer layer, Kernel input, int i, int j) {
 
         return sum / count;
     }
+
+    @Override
+    public void unpool(PoolingLayer layer, int outX, int outY, Kernel deltaPooling, Kernel deltaUnpooled, Kernel input) {
+        double deltaVal = deltaPooling.getValue(outX, outY);
+
+        int startX = outX * layer.getStride();
+        int startY = outY * layer.getStride();
+
+        int endX = Math.min(startX + layer.getKernelWidth(), input.getWidth());
+        int endY = Math.min(startY + layer.getKernelHeight(), input.getHeight());
+
+        int poolArea = (endX - startX) * (endY - startY);
+        double distributedDelta = deltaVal / poolArea;
+
+        for (int y = startY; y < endY; y++) {
+            for (int x = startX; x < endX; x++) {
+                double current = deltaUnpooled.getValue(x, y);
+                deltaUnpooled.setValue(x, y, current + distributedDelta);
+            }
+        }
+    }
 }
diff --git a/src/main/java/net/echo/brain4j/convolution/pooling/impl/MaxPooling.java b/src/main/java/net/echo/brain4j/convolution/pooling/impl/MaxPooling.java
@@ -30,4 +30,33 @@ public double apply(PoolingLayer layer, Kernel input, int i, int j) {
 
         return pooledValue;
     }
+
+    @Override
+    public void unpool(PoolingLayer layer, int outX, int outY, Kernel deltaPooling, Kernel deltaUnpooled, Kernel input) {
+        double deltaVal = deltaPooling.getValue(outX, outY);
+        double maxVal = Double.NEGATIVE_INFINITY;
+
+        int startX = outX * layer.getStride();
+        int startY = outY * layer.getStride();
+
+        int endX = Math.min(startX + layer.getKernelWidth(), input.getWidth());
+        int endY = Math.min(startY + layer.getKernelHeight(), input.getHeight());
+
+        int maxX = startX, maxY = startY;
+
+        for (int y = startY; y < endY; y++) {
+            for (int x = startX; x < endX; x++) {
+                double val = input.getValue(x, y);
+
+                if (val > maxVal) {
+                    maxVal = val;
+                    maxX = x;
+                    maxY = y;
+                }
+            }
+        }
+
+        double current = deltaUnpooled.getValue(maxX, maxY);
+        deltaUnpooled.setValue(maxX, maxY, current + deltaVal);
+    }
 }
diff --git a/src/main/java/net/echo/brain4j/layer/impl/convolution/ConvLayer.java b/src/main/java/net/echo/brain4j/layer/impl/convolution/ConvLayer.java
@@ -4,13 +4,11 @@
 import net.echo.brain4j.activation.Activations;
 import net.echo.brain4j.convolution.Kernel;
 import net.echo.brain4j.layer.Layer;
-import net.echo.brain4j.layer.impl.DenseLayer;
 import net.echo.brain4j.structure.Neuron;
 import net.echo.brain4j.structure.cache.Parameters;
 import net.echo.brain4j.structure.cache.StatesCache;
 import net.echo.brain4j.training.optimizers.Optimizer;
 import net.echo.brain4j.training.updater.Updater;
-import org.checkerframework.checker.units.qual.K;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -130,50 +128,87 @@ public Kernel forward(StatesCache cache, Layer<?, ?> lastLayer, Kernel input) {
     public void propagate(StatesCache cache, Layer<?, ?> nextLayer, Updater updater, Optimizer optimizer) {
         Kernel featureMap = cache.getFeatureMap(this);
 
-        if (nextLayer instanceof FlattenLayer) {
-            List<Neuron> neurons = nextLayer.getNeurons();
-            Kernel deltaKernel = new Kernel(featureMap.getWidth(), featureMap.getHeight());
+        switch (nextLayer) {
+            case FlattenLayer flattenLayer -> {
+                List<Neuron> neurons = nextLayer.getNeurons();
+                Kernel deltaKernel = new Kernel(featureMap.getWidth(), featureMap.getHeight());
 
-            for (int h = 0; h < featureMap.getHeight(); h++) {
-                for (int w = 0; w < featureMap.getWidth(); w++) {
-                    int index = h * featureMap.getWidth() + w;
-                    double deltaNeuron = neurons.get(index).getDelta(cache);
+                for (int h = 0; h < featureMap.getHeight(); h++) {
+                    for (int w = 0; w < featureMap.getWidth(); w++) {
+                        int index = h * featureMap.getWidth() + w;
+                        double deltaNeuron = neurons.get(index).getDelta(cache);
 
-                    double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h));
-                    double localDelta = deltaNeuron * derivative;
+                        double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h));
+                        double localDelta = deltaNeuron * derivative;
 
-                    deltaKernel.setValue(w, h, localDelta);
+                        deltaKernel.setValue(w, h, localDelta);
+                    }
                 }
+
+                updateParameters(cache, optimizer, deltaKernel);
             }
+            case ConvLayer nextConvLayer -> {
+                Kernel deltaNext = cache.getDelta(nextConvLayer);
+                Kernel deltaCurrent = new Kernel(featureMap.getWidth(), featureMap.getHeight());
 
-            updateParameters(cache, optimizer, deltaKernel);
-        } else if (nextLayer instanceof ConvLayer nextConvLayer) {
-            Kernel deltaNext = cache.getDelta(nextConvLayer);
-            Kernel deltaCurrent = new Kernel(featureMap.getWidth(), featureMap.getHeight());
+                for (Kernel nextKernel : nextConvLayer.getKernels()) {
+                    Kernel rotatedKernel = nextKernel.rotate180();
+                    Kernel contribution = deltaNext.convolute(rotatedKernel, 1);
 
-            for (Kernel nextKernel : nextConvLayer.getKernels()) {
-                Kernel rotatedKernel = nextKernel.rotate180();
-                Kernel contribution = deltaNext.convolute(rotatedKernel, 1);
+                    if (contribution.getWidth() != deltaCurrent.getWidth() || contribution.getHeight() != deltaCurrent.getHeight()) {
+                        contribution = cropTo(contribution, deltaCurrent.getWidth(), deltaCurrent.getHeight());
+                    }
 
-                if (contribution.getWidth() != deltaCurrent.getWidth() || contribution.getHeight() != deltaCurrent.getHeight()) {
-                    contribution = cropTo(contribution, deltaCurrent.getWidth(), deltaCurrent.getHeight());
+                    deltaCurrent.add(contribution);
                 }
 
-                deltaCurrent.add(contribution);
-            }
-
-            for (int h = 0; h < deltaCurrent.getHeight(); h++) {
-                for (int w = 0; w < deltaCurrent.getWidth(); w++) {
-                    double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h));
-                    double updatedDelta = clipGradient(deltaCurrent.getValue(w, h) * derivative);
+                for (int h = 0; h < deltaCurrent.getHeight(); h++) {
+                    for (int w = 0; w < deltaCurrent.getWidth(); w++) {
+                        double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h));
+                        double updatedDelta = clipGradient(deltaCurrent.getValue(w, h) * derivative);
 
-                    deltaCurrent.setValue(w, h, updatedDelta);
+                        deltaCurrent.setValue(w, h, updatedDelta);
+                    }
                 }
+
+                updateParameters(cache, optimizer, deltaCurrent);
             }
+            case PoolingLayer poolingLayer -> {
+                Kernel deltaPooling = cache.getDelta(poolingLayer);
+                Kernel deltaUnpooled = new Kernel(featureMap.getWidth(), featureMap.getHeight());
+
+                int poolWidth = poolingLayer.getKernelWidth();
+                int poolHeight = poolingLayer.getKernelHeight();
+                int poolStride = poolingLayer.getStride();
+
+                for (int ph = 0; ph < deltaPooling.getHeight(); ph++) {
+                    for (int pw = 0; pw < deltaPooling.getWidth(); pw++) {
+                        double deltaVal = deltaPooling.getValue(pw, ph);
+
+                        int startX = pw * poolStride;
+                        int startY = ph * poolStride;
+
+                        for (int y = startY; y < startY + poolHeight && y < featureMap.getHeight(); y++) {
+                            for (int x = startX; x < startX + poolWidth && x < featureMap.getWidth(); x++) {
+                                double current = deltaUnpooled.getValue(x, y);
+                                current += deltaVal / (poolWidth * poolHeight);
+                                deltaUnpooled.setValue(x, y, current);
+                            }
+                        }
+                    }
+                }
+
+                for (int h = 0; h < deltaUnpooled.getHeight(); h++) {
+                    for (int w = 0; w < deltaUnpooled.getWidth(); w++) {
+                        double derivative = activation.getFunction().getDerivative(featureMap.getValue(w, h));
+                        double updatedDelta = clipGradient(deltaUnpooled.getValue(w, h) * derivative);
+                        deltaUnpooled.setValue(w, h, updatedDelta);
+                    }
+                }
 
-            updateParameters(cache, optimizer, deltaCurrent);
-        } else {
-            throw new UnsupportedOperationException("Propagation not support for " + nextLayer.getClass().getSimpleName());
+                updateParameters(cache, optimizer, deltaUnpooled);
+            }
+            default -> throw new UnsupportedOperationException("Propagation not support for " + nextLayer.getClass().getSimpleName());
         }
     }
 
diff --git a/src/main/java/net/echo/brain4j/layer/impl/convolution/PoolingLayer.java b/src/main/java/net/echo/brain4j/layer/impl/convolution/PoolingLayer.java
@@ -5,7 +5,11 @@
 import net.echo.brain4j.convolution.Kernel;
 import net.echo.brain4j.convolution.pooling.PoolingType;
 import net.echo.brain4j.layer.Layer;
+import net.echo.brain4j.structure.Neuron;
+import net.echo.brain4j.structure.cache.Parameters;
 import net.echo.brain4j.structure.cache.StatesCache;
+import net.echo.brain4j.training.optimizers.Optimizer;
+import net.echo.brain4j.training.updater.Updater;
 
 public class PoolingLayer extends Layer<Kernel, Kernel> {
 
@@ -25,6 +29,7 @@ public PoolingLayer(PoolingType poolingType, int kernelWidth, int kernelHeight,
 
     public PoolingLayer(PoolingType poolingType, int kernelWidth, int kernelHeight, int stride, int padding) {
         super(kernelWidth * kernelHeight, Activations.LINEAR);
+        this.id = Parameters.TOTAL_CONV_LAYER++;
         this.poolingType = poolingType;
         this.kernelHeight = kernelHeight;
         this.kernelWidth = kernelWidth;
@@ -56,8 +61,56 @@ public Kernel forward(StatesCache cache, Layer<?, ?> lastLayer, Kernel input) {
             }
         }
 
+        cache.setFeatureMap(this, output);
+        cache.setInput(this, input);
+
         return output;
     }
+    @Override
+    public void propagate(StatesCache cache, Layer<?, ?> nextLayer, Updater updater, Optimizer optimizer) {
+        System.out.println("Layer id: " + id);
+        Kernel output = cache.getFeatureMap(this);
+        Kernel input = cache.getInput(this);
+
+        Kernel deltaPooling = new Kernel(output.getWidth(), output.getHeight());
+
+        if (nextLayer instanceof ConvLayer convLayer) {
+            System.out.println("Getting pooling from conv");
+            deltaPooling = cache.getDelta(convLayer);
+        } else if (nextLayer instanceof FlattenLayer flattenLayer) {
+            int outW = output.getWidth();
+            int outH = output.getHeight();
+
+            for (int h = 0; h < outH; h++) {
+                for (int w = 0; w < outW; w++) {
+                    int index = h * outW + w;
+
+                    Neuron neuron = flattenLayer.getNeuronAt(index);
+                    double neuronDelta = neuron.getDelta(cache);
+
+                    deltaPooling.setValue(w, h, neuronDelta);
+                }
+            }
+        } else {
+            throw new UnsupportedOperationException("Unsupported layer after pooling layer!");
+        }
+
+        Kernel deltaUnpooled = new Kernel(input.getWidth(), input.getHeight());
+
+        System.out.println("OutX: " + output.getWidth());
+        System.out.println("OutY: " + output.getHeight());
+
+        System.out.println("DX: " + deltaPooling.getWidth());
+        System.out.println("DY: " + deltaPooling.getHeight());
+        for (int outY = 0; outY < output.getHeight(); outY++) {
+            for (int outX = 0; outX < output.getWidth(); outX++) {
+                poolingType.getFunction().unpool(this, outX, outY, deltaPooling, deltaUnpooled, input);
+            }
+        }
+
+        cache.setDelta(this, deltaUnpooled);
+    }
+
 
     public PoolingType getPoolingType() {
         return poolingType;
diff --git a/src/main/java/net/echo/brain4j/structure/cache/StatesCache.java b/src/main/java/net/echo/brain4j/structure/cache/StatesCache.java
@@ -1,7 +1,7 @@
 package net.echo.brain4j.structure.cache;
 
 import net.echo.brain4j.convolution.Kernel;
-import net.echo.brain4j.layer.impl.convolution.ConvLayer;
+import net.echo.brain4j.layer.Layer;
 import net.echo.brain4j.structure.Neuron;
 
 public class StatesCache {
@@ -15,32 +15,33 @@ public class StatesCache {
     public StatesCache() {
         this.valuesCache = new float[Parameters.TOTAL_NEURONS];
         this.deltasCache = new float[Parameters.TOTAL_NEURONS];
+
         this.inputMap = new Kernel[Parameters.TOTAL_CONV_LAYER];
         this.featureMaps = new Kernel[Parameters.TOTAL_CONV_LAYER];
         this.deltaMap = new Kernel[Parameters.TOTAL_CONV_LAYER];
     }
 
-    public void setInput(ConvLayer layer, Kernel input) {
+    public void setInput(Layer<Kernel, Kernel> layer, Kernel input) {
         inputMap[layer.getId()] = input;
     }
 
-    public Kernel getInput(ConvLayer layer) {
+    public Kernel getInput(Layer<Kernel, Kernel> layer) {
         return inputMap[layer.getId()];
     }
 
-    public void setFeatureMap(ConvLayer layer, Kernel output) {
+    public void setFeatureMap(Layer<Kernel, Kernel> layer, Kernel output) {
         featureMaps[layer.getId()] = output;
     }
 
-    public Kernel getFeatureMap(ConvLayer layer) {
+    public Kernel getFeatureMap(Layer<Kernel, Kernel> layer) {
         return featureMaps[layer.getId()];
     }
 
-    public Kernel getDelta(ConvLayer layer) {
+    public Kernel getDelta(Layer<Kernel, Kernel> layer) {
         return deltaMap[layer.getId()];
     }
 
-    public void setDelta(ConvLayer layer, Kernel delta) {
+    public void setDelta(Layer<Kernel, Kernel> layer, Kernel delta) {
         deltaMap[layer.getId()] = delta;
     }
 
diff --git a/src/test/java/conv/ConvExample.java b/src/test/java/conv/ConvExample.java
@@ -42,7 +42,7 @@ private void start() throws IOException {
         System.out.println(model.getStats());
         model.fit(dataSet);
 
-        for (int i = 0; i < 100; i++) {
+        for (int i = 0; i < 1; i++) {
             long start = System.nanoTime();
             model.fit(dataSet);
             double took = (System.nanoTime() - start) / 1e6;
@@ -79,15 +79,16 @@ private Sequential getModel() {
                 new InputLayer(28, 28),
 
                 // #1 convolutional block
-                new ConvLayer(32, 3, 3, Activations.MISH),
-                // new PoolingLayer(PoolingType.MAX, 2, 2, 2),
+                new ConvLayer(20, 3, 3, Activations.MISH),
+                new PoolingLayer(PoolingType.MAX, 2, 2, 2),
 
                 // #2 convolutional block
                 new ConvLayer(16, 5, 5, Activations.MISH),
-                // new PoolingLayer(PoolingType.MAX, 2, 2, 2),
+
+                new PoolingLayer(PoolingType.MAX, 2, 2, 2),
 
                 // Flattens the feature map to a 1D vector
-                new FlattenLayer(484), // You must find the right size by trial and error
+                new FlattenLayer(25), // You must find the right size by trial and error
 
                 // Classifiers
                 new DenseLayer(32, Activations.MISH),
@@ -102,7 +103,7 @@ private DataSet<DataRow> getDataSet() throws IOException {
 
         List<String> lines = FileUtils.readLines(new File("dataset.csv"), "UTF-8");
 
-        int max = 1500, i = 0;
+        int max = 1, i = 0;
 
         for (String line : lines) {
             i++;

Original file line number	Diff line number	Diff line change
`@@ -6,4 +6,6 @@`
`6`	`6`	`public interface PoolingFunction {`
`7`	`7`
`8`	`8`	`double apply(PoolingLayer layer, Kernel input, int i, int j);`
	`9`	`+`
	`10`	`+ void unpool(PoolingLayer layer, int outX, int outY, Kernel deltaPooling, Kernel deltaUnpooled, Kernel input);`
`9`	`11`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`package net.echo.brain4j.structure.cache;`
`2`	`2`
`3`	`3`	`import net.echo.brain4j.convolution.Kernel;`
`4`		`-import net.echo.brain4j.layer.impl.convolution.ConvLayer;`
	`4`	`+import net.echo.brain4j.layer.Layer;`
`5`	`5`	`import net.echo.brain4j.structure.Neuron;`
`6`	`6`
`7`	`7`	`public class StatesCache {`
`@@ -15,32 +15,33 @@ public class StatesCache {`
`15`	`15`	`public StatesCache() {`
`16`	`16`	`this.valuesCache = new float[Parameters.TOTAL_NEURONS];`
`17`	`17`	`this.deltasCache = new float[Parameters.TOTAL_NEURONS];`
	`18`	`+`
`18`	`19`	`this.inputMap = new Kernel[Parameters.TOTAL_CONV_LAYER];`
`19`	`20`	`this.featureMaps = new Kernel[Parameters.TOTAL_CONV_LAYER];`
`20`	`21`	`this.deltaMap = new Kernel[Parameters.TOTAL_CONV_LAYER];`
`21`	`22`	`}`
`22`	`23`
`23`		`- public void setInput(ConvLayer layer, Kernel input) {`
	`24`	`+ public void setInput(Layer<Kernel, Kernel> layer, Kernel input) {`
`24`	`25`	`inputMap[layer.getId()] = input;`
`25`	`26`	`}`
`26`	`27`
`27`		`- public Kernel getInput(ConvLayer layer) {`
	`28`	`+ public Kernel getInput(Layer<Kernel, Kernel> layer) {`
`28`	`29`	`return inputMap[layer.getId()];`
`29`	`30`	`}`
`30`	`31`
`31`		`- public void setFeatureMap(ConvLayer layer, Kernel output) {`
	`32`	`+ public void setFeatureMap(Layer<Kernel, Kernel> layer, Kernel output) {`
`32`	`33`	`featureMaps[layer.getId()] = output;`
`33`	`34`	`}`
`34`	`35`
`35`		`- public Kernel getFeatureMap(ConvLayer layer) {`
	`36`	`+ public Kernel getFeatureMap(Layer<Kernel, Kernel> layer) {`
`36`	`37`	`return featureMaps[layer.getId()];`
`37`	`38`	`}`
`38`	`39`
`39`		`- public Kernel getDelta(ConvLayer layer) {`
	`40`	`+ public Kernel getDelta(Layer<Kernel, Kernel> layer) {`
`40`	`41`	`return deltaMap[layer.getId()];`
`41`	`42`	`}`
`42`	`43`
`43`		`- public void setDelta(ConvLayer layer, Kernel delta) {`
	`44`	`+ public void setDelta(Layer<Kernel, Kernel> layer, Kernel delta) {`
`44`	`45`	`deltaMap[layer.getId()] = delta;`
`45`	`46`	`}`
`46`	`47`