Actually code the custom linearization for matmul

axch · emilyfertig · axch · commit 289a43e92266 · 2023-05-16T10:30:11.000-04:00
so that we don't embarrass ourselves too much with how poorly AD does
thereon.

Co-authored-by: Emily Fertig &lt;emilyaf@google.com&gt;
diff --git a/lib/prelude.dx b/lib/prelude.dx
@@ -2576,6 +2576,7 @@ def tile(
     body (FullTileIx(n, tile_size, tile_ix'))
   body (CodaIx(n, coda_offset, coda_size))
 
+@noinline
 def tiled_matmul(
     x: l=>m=>Float,
     y: m=>n=>Float
@@ -2601,9 +2602,18 @@ def (**)(
     x: l=>m=>Float,
     y: m=>n=>Float
     ) -> l=>n=>Float  given (l|Ix, m|Ix, n|Ix) =
-  -- TODO(https://github.com/google-research/dex-lang/issues/1212) Replace with tiled_matmul.
   tiled_matmul(x, y)
 
+def matmul_linearization(
+    x: l=>m=>Float,
+    y: m=>n=>Float
+    ) -> _ given (l|Ix, m|Ix, n|Ix) =
+  def lin(xt: l=>m=>Float, yt: m=>n=>Float) -> _ =
+    x ** yt + xt ** y
+  (x ** y, lin)
+
+custom-linearization tiled_matmul matmul_linearization
+
 def (**.)(mat: n=>m=>Float, v: m=>Float) -> (n=>Float) given (n|Ix, m|Ix) =
   for i. vdot(mat[i], v)
 def(.**)(v: n=>Float, mat: n=>m=>Float) -> (m=>Float) given (n|Ix, m|Ix) =
diff --git a/tests/ad-tests.dx b/tests/ad-tests.dx
@@ -498,3 +498,24 @@ deriv (\x. q x 1.0) 1.0
 -- Correct derivative, based on the SomeTangent branch
 deriv (\x. q x x) 1.0
 > 3.
+
+------ Check custom linearization of matmul ------
+
+amat = for i:(Fin 100) j:(Fin 100). n_to_f $ ordinal (i, j)
+
+-- The derivative of matmul should give the same answers as a direct
+-- matmul (this checks that the custom derivative is not too busted).
+
+def mmp'(m1:l=>m=>Float, m2:m=>n=>Float) -> l=>n=>Float given (l|Ix, m|Ix, n|Ix) =
+  jvp (\m. m1 ** m) m2 m2
+
+:p mmp'(amat, amat) ~~ naive_matmul(amat, amat)
+> True
+
+-- Let's check the other orientation too
+
+def mmp''(m1:l=>m=>Float, m2:m=>n=>Float) -> l=>n=>Float given (l|Ix, m|Ix, n|Ix) =
+  jvp (\m. m ** m2) m1 m1
+
+:p mmp''(amat, amat) ~~ naive_matmul(amat, amat)
+> True