Add support for tf.Variables in TF custom gradients.

SiegeLordEx · tensorflower-gardener · commit 5faa1ac99066 · 2021-08-02T16:20:00.000-07:00
PiperOrigin-RevId: 388328639
diff --git a/tensorflow_probability/python/internal/BUILD b/tensorflow_probability/python/internal/BUILD
@@ -233,6 +233,17 @@ multi_substrate_py_library(
     ],
 )
 
+multi_substrate_py_test(
+    name = "custom_gradient_test",
+    srcs = ["custom_gradient_test.py"],
+    deps = [
+        ":custom_gradient",
+        # tensorflow dep,
+        "//tensorflow_probability/python/internal:test_util",
+        "//tensorflow_probability/python/math:gradient",
+    ],
+)
+
 py_test(
     name = "cache_util_test",
     size = "small",
diff --git a/tensorflow_probability/python/internal/custom_gradient.py b/tensorflow_probability/python/internal/custom_gradient.py
@@ -92,14 +92,25 @@ def f_wrapped(*args, **kwargs):
               args = args[1:]
           val, aux = vjp_fwd(*reconstruct_args, **kwargs)
 
-          def vjp_bwd_wrapped(*g):
+          def vjp_bwd_wrapped(*g, **kwargs):
+            # We don't want to use an explicit `variables` arg, because TF will
+            # complain if the wrapped function doesn't actually have variables
+            # in it. TF will only specify this arg if there are variables.
+            variables = kwargs.get('variables', ())
             nondiff_args = [closure[i] for i in nondiff_argnums]
-            result = tf.nest.flatten(
-                vjp_bwd(*nondiff_args, aux, tf.nest.pack_sequence_as(val, g)))
+            result = vjp_bwd(*nondiff_args, aux,
+                             tf.nest.pack_sequence_as(val, g), **kwargs)
+            if variables:
+              result, variables = result
+            result = tf.nest.flatten(result)
             for i in nondiff_argnums:
               result = tuple(result[:i]) + (None,) + tuple(result[i:])
             result = [a for i, a in enumerate(result) if i not in closure]
-            return tf.nest.pack_sequence_as(args_structure, result)
+            result = tf.nest.pack_sequence_as(args_structure, result)
+            if variables:
+              return result, variables
+            else:
+              return result
 
           return val, vjp_bwd_wrapped
 
diff --git a/tensorflow_probability/python/internal/custom_gradient_test.py b/tensorflow_probability/python/internal/custom_gradient_test.py
@@ -0,0 +1,156 @@
+# Copyright 2021 The TensorFlow Probability Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Tests for custom_gradient."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v2 as tf
+
+from tensorflow_probability.python.internal import custom_gradient
+from tensorflow_probability.python.internal import test_util
+from tensorflow_probability.python.math import gradient as tfp_gradient
+
+JAX_MODE = False
+
+
+@test_util.numpy_disable_gradient_test
+@test_util.test_all_tf_execution_regimes
+class CustomGradientTest(test_util.TestCase):
+
+  def testVJP(self):
+
+    def f_vjp_fwd(x, y):
+      return x**2 + y**2, (x, y)
+
+    def f_vjp_bwd(x_y, dz):
+      x, y = x_y
+      return 7. * dz * x, 7. * dz * y
+
+    @custom_gradient.custom_gradient(
+        vjp_fwd=f_vjp_fwd,
+        vjp_bwd=f_vjp_bwd,
+    )
+    def f(x, y):
+      return f_vjp_fwd(x, y)[0]
+
+    x = tf.constant(2.)
+    y = tf.constant(3.)
+    dz = tf.constant(5.)
+
+    z1 = f(x, y)
+    z2, (dx, dy) = tfp_gradient.value_and_gradient(
+        f, (x, y), output_gradients=dz)
+
+    self.assertAllClose(x**2 + y**2, z1)
+    self.assertAllClose(x**2 + y**2, z2)
+    self.assertAllClose(7. * dz * x, dx)
+    self.assertAllClose(7. * dz * y, dy)
+
+  @test_util.jax_disable_variable_test
+  def testVJPWithVariables(self):
+
+    def f_vjp_fwd(x):
+      return x**2 + y**2, x
+
+    def f_vjp_bwd(x, dz, variables):
+      y = variables[0]
+      return 7. * dz * x, [7. * dz * y]
+
+    @custom_gradient.custom_gradient(
+        vjp_fwd=f_vjp_fwd,
+        vjp_bwd=f_vjp_bwd,
+    )
+    def f(x):
+      return f_vjp_fwd(x)[0]
+
+    x = tf.constant(2.)
+    y = tf.Variable(3.)
+    dz = tf.constant(5.)
+
+    self.evaluate(y.initializer)
+
+    z1 = f(x)
+
+    # Use GradientTape to implicitly capture the variable.
+    with tf.GradientTape() as tape:
+      tape.watch(x)
+      z2 = f(x)
+
+    dx, dy = tape.gradient(z2, (x, y), output_gradients=dz)
+
+    self.assertAllClose(x**2 + y**2, z1)
+    self.assertAllClose(x**2 + y**2, z2)
+    self.assertAllClose(7. * dz * x, dx)
+    self.assertAllClose(7. * dz * y, dy)
+
+  def testJVP(self):
+    if not JAX_MODE:
+      self.skipTest('Custom JVPs are JAX-only.')
+
+    def f_vjp_fwd(x, y):
+      # When a JVP is specified, this function is ignored.
+      raise NotImplementedError()
+
+    def f_vjp_bwd(x_y, dz):
+      # When a JVP is specified, this function is ignored.
+      raise NotImplementedError()
+
+    def f_jvp(x_y, dx_dy):
+      x, y = x_y
+      dx, dy = dx_dy
+      return f(x, y), 7. * (dx * x + dy * y)
+
+    @custom_gradient.custom_gradient(
+        vjp_fwd=f_vjp_fwd,
+        vjp_bwd=f_vjp_bwd,
+        jvp_fn=f_jvp,
+    )
+    def f(x, y):
+      return x**2 + y**2
+
+    x = tf.constant(2.)
+    y = tf.constant(3.)
+    dz = tf.constant(5.)
+
+    z1 = f(x, y)
+    z2, (dx, dy) = tfp_gradient.value_and_gradient(
+        f, (x, y), output_gradients=dz)
+
+    self.assertAllClose(x**2 + y**2, z1)
+    self.assertAllClose(x**2 + y**2, z2)
+    self.assertAllClose(7. * dz * x, dx)
+    self.assertAllClose(7. * dz * y, dy)
+
+    import jax  # pylint: disable=g-import-not-at-top
+
+    z3, dz2 = jax.jvp(f, (x, y), (dx, dy))
+    self.assertAllClose(x**2 + y**2, z3)
+    self.assertAllClose(7. * (dx * x + dy * y), dz2)
+
+  def testPreventGradient(self):
+
+    def f(x):
+      return custom_gradient.prevent_gradient(x, 'No gradient')
+
+    _ = f(1.)
+
+    with self.assertRaisesRegex(LookupError, 'No gradient'):
+      tfp_gradient.value_and_gradient(f, (1.))
+
+
+if __name__ == '__main__':
+  tf.test.main()