Add backprop implementation, still needs work

aiviaghost · aiviaghost · commit 209d58c6eab8 · 2022-08-11T04:43:49.000+02:00
diff --git a/SkalaNet/build.sbt b/SkalaNet/build.sbt
@@ -1,3 +1,13 @@
+// import scala.scalanative.build.*
+
 scalaVersion := "3.1.3"
 
+/*
 enablePlugins(ScalaNativePlugin)
+
+nativeConfig ~= {
+    _.withLTO(LTO.thin)
+     .withMode(Mode.releaseFull)
+     .withGC(GC.commix)
+}
+*/
diff --git a/SkalaNet/src/main/scala/Image.scala b/SkalaNet/src/main/scala/Image.scala
@@ -20,12 +20,12 @@ object Image:
 
     private def readLabels(labelFile: String): Seq[Int] = readBytes(labelFile).drop(8).map(_.toInt)
 
-    def readImages(imageFile: String, labelFile: String): Seq[Image] = 
+    def readImages(imageFile: String, labelFile: String): IndexedSeq[Image] = 
         val labels = readLabels(labelFile)
         readBytes(imageFile).drop(16)
                             .map(_.toInt & 255) // convert to unsigned "byte" by masking with 0b11111111
                             .grouped(28 * 28)
                             .map(_.grouped(28).toArray)
                             .zip(labels)
                             .map((pixels, label) => Image(pixels = pixels, label = label))
-                            .toSeq
+                            .toIndexedSeq
diff --git a/SkalaNet/src/main/scala/Main.scala b/SkalaNet/src/main/scala/Main.scala
@@ -29,7 +29,7 @@ def tryNetwork() =
 
 def trainNetwork() = nn.SGD(
         trainingData = trainingImages.map(img => (img.toColumnVector(), img.label)), 
-        epochs = 1,
+        epochs = 2,
         batchSize = 100
     )
 
diff --git a/SkalaNet/src/main/scala/Matrix.scala b/SkalaNet/src/main/scala/Matrix.scala
@@ -1,15 +1,15 @@
 package SkalaNet
 
 import SkalaNet.Types.*
-import scalanative.unsafe.*
+/*import scalanative.unsafe.*
 
 @extern
 def mult(
     n: CInt, m: CInt, p: CInt, 
     A: Ptr[CFloat], B: Ptr[CFloat], 
     res: Ptr[CFloat]
 ): Unit = extern
-
+*/
 extension (M: Matrix)
 
     def rows: Int = M.size
@@ -29,13 +29,8 @@ extension (M: Matrix)
     def -(other: Matrix): Matrix = 
         assert(rows == other.rows && cols == other.cols, "Matrix dimensions do not match!")
 
-        val newM = Array.ofDim[Float](rows, cols)
-        for i <- 0 until rows do 
-            for j <- 0 until cols do
-                newM(i)(j) = M(i)(j) - other(i)(j)
-        
-        newM
-        
+        M + other * -1
+     /*   
     def *(other: Matrix): Matrix = 
         assert(cols == other.rows, "Dimensions are not valid for multiplication!")
         
@@ -60,9 +55,28 @@ extension (M: Matrix)
                 newM(i)(j) = !(res + i * p + j)
         
         newM
-    
+    */
+
+    def *(other: Matrix): Matrix = 
+        assert(cols == other.rows)
+        val (n, m, p) = (rows, cols, other.cols)
+        val res = Array.ofDim[Float](n, p)
+        for i <- 0 until n do
+            for j <- 0 until p do
+                for k <- 0 until m do
+                    res(i)(j) = M(i)(k) * other(k)(j)
+        res
+
     def *(c: Float): Matrix = 
         M.map(_.map(z => c * z))
+    
+    def ⊙(other: Matrix): Matrix = 
+        assert(rows == other.rows && cols == other.cols, "Matrix dimensions differ!")
+        val res = Array.ofDim[Float](rows, cols)
+        for i <- 0 until rows do
+            for j <- 0 until cols do
+                res(i)(j) = M(i)(j) * other(i)(j)
+        res
 
 object Matrix:
 
diff --git a/SkalaNet/src/main/scala/NeuralNetwork.scala b/SkalaNet/src/main/scala/NeuralNetwork.scala
@@ -1,6 +1,8 @@
 package SkalaNet
 
 import SkalaNet.Types.*
+import collection.mutable.ArrayBuffer
+import Utils.zip
 
 extension (x: Float)
     def **(y: Int): Float = 
@@ -14,15 +16,20 @@ case class NeuralNetwork private (private val layerSizes: Seq[Int]):
     // ReLU ;)
     private def __/(m: Matrix): Matrix = m.map(_.map(z => math.max(z, 0)))
 
+    private def reluPrime(m: Matrix): Matrix = m.map(_.map(z => if z > 0 then 1 else 0))
+
     private def feedforward(inp: Matrix): Matrix = 
         weights.zip(biases).foldLeft(inp){case (x, (w, b)) => __/(w * x + b)}
 
+    private def costPrime(output: Matrix, expectedOutput: Matrix): Matrix = 
+        (output - expectedOutput) * 2
+
     // query the network using a matrix representing the image
     def apply(inp: Matrix): Int = 
         feedforward(inp).flatten.zipWithIndex.max._2
 
     // perform stochastic gradient descent
-    def SGD(trainingData: Seq[(Matrix, Int)], epochs: Int, batchSize: Int): Unit = 
+    def SGD(trainingData: IndexedSeq[(Matrix, Int)], epochs: Int, batchSize: Int): Unit = 
         import util.Random.shuffle
         val n = trainingData.size
         for epoch <- 1 to epochs do
@@ -43,10 +50,32 @@ case class NeuralNetwork private (private val layerSizes: Seq[Int]):
         weights = weights.zip(nablaW).map((w, nw) => w - nw * (1 / len))
         biases = biases.zip(nablaB).map((b, nb) => b - nb * (1 / len))
 
-    private def backprop(inp: Matrix, expectedAns: Int): (Seq[Matrix], Seq[Matrix]) = ???
+    private def backprop(inp: Matrix, expectedAns: Int): (Seq[Matrix], Seq[Matrix]) = 
+        val deltaW = ArrayBuffer[Matrix]()
+        val deltaB = ArrayBuffer[Matrix]()
+        
+        val zs = ArrayBuffer[Matrix]()
+        val as = ArrayBuffer[Matrix](inp)
+        weights.zip(biases).foldLeft(inp){
+            case (x, (w, b)) => 
+                val z = w * x + b
+                zs.append(z)
+                val a = __/(z)
+                as.append(a)
+                a
+        }
+        
+        val expectedOutput = Array.ofDim[Float](10, 1)
+        expectedOutput(expectedAns)(0) = 1f
+        var delta = costPrime(as.last, expectedOutput) ⊙ reluPrime(zs.last)
+        deltaW.append(delta * as.init.last.transpose)
+        deltaB.append(delta)
 
-    private def cost(output: Matrix, expectedOutput: Matrix): Float = 
-        output.flatten.zip(expectedOutput.flatten).foldLeft(0f){case (acc, (a, b)) => acc + (a - b) ** 2}
+        for (w_next, z, a_prev) <- zip(weights.tail, zs.init, as.init.init).reverse do
+            delta = (w_next.transpose * delta) ⊙ reluPrime(z)
+            deltaB.append(delta)
+            deltaW.append(delta * a_prev.transpose)
+        (deltaW.reverse.toSeq, deltaB.reverse.toSeq)
 
 object NeuralNetwork:
 
diff --git a/SkalaNet/src/main/scala/Utils.scala b/SkalaNet/src/main/scala/Utils.scala
@@ -0,0 +1,8 @@
+package SkalaNet
+
+object Utils:
+
+    def zip[A, B, C](l1: IterableOnce[A], l2: IterableOnce[B], l3: IterableOnce[C]): Seq[(A, B, C)] =
+        val (i1, i2, i3) = (l1.iterator, l2.iterator, l3.iterator)
+        for i <- 0 until Seq(l1.size, l2.size, l3.size).max yield 
+            (i1.next, i2.next, i3.next)

Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@ def tryNetwork() =`
`29`	`29`
`30`	`30`	`def trainNetwork() = nn.SGD(`
`31`	`31`	`trainingData = trainingImages.map(img => (img.toColumnVector(), img.label)),`
`32`		`- epochs = 1,`
	`32`	`+ epochs = 2,`
`33`	`33`	`batchSize = 100`
`34`	`34`	`)`
`35`	`35`