Add tests for pathwise derivative (#1051)

fehiepsi · web-flow · commit 1c44b1e62696 · 2021-06-01T11:32:33.000-06:00
diff --git a/test/test_distributions.py b/test/test_distributions.py
@@ -12,7 +12,7 @@
 import scipy.stats as osp
 
 import jax
-from jax import grad, jacfwd, lax, vmap
+from jax import grad, lax, vmap
 import jax.numpy as jnp
 import jax.random as random
 from jax.scipy.special import expit, logsumexp
@@ -680,48 +680,33 @@ def fn(args):
 
 
 @pytest.mark.parametrize(
-    "jax_dist, sp_dist, params",
+    "jax_dist, params",
     [
-        (dist.Gamma, osp.gamma, (1.0,)),
-        (dist.Gamma, osp.gamma, (0.1,)),
-        (dist.Gamma, osp.gamma, (10.0,)),
-        (dist.Chi2, osp.chi2, (1.0,)),
-        (dist.Chi2, osp.chi2, (0.1,)),
-        (dist.Chi2, osp.chi2, (10.0,)),
-        # TODO: add more test cases for Beta/StudentT (and Dirichlet too) when
-        # their pathwise grad (independent of standard_gamma grad) is implemented.
-        pytest.param(
-            dist.Beta,
-            osp.beta,
-            (1.0, 1.0),
-            marks=pytest.mark.xfail(
-                reason="currently, variance of grad of beta sampler is large"
-            ),
-        ),
-        pytest.param(
-            dist.StudentT,
-            osp.t,
-            (1.0,),
-            marks=pytest.mark.xfail(
-                reason="currently, variance of grad of t sampler is large"
-            ),
-        ),
+        (dist.Gamma, (1.0,)),
+        (dist.Gamma, (0.1,)),
+        (dist.Gamma, (10.0,)),
+        (dist.Chi2, (1.0,)),
+        (dist.Chi2, (0.1,)),
+        (dist.Chi2, (10.0,)),
+        (dist.Beta, (1.0, 1.0)),
+        (dist.StudentT, (5.0, 2.0, 4.0)),
     ],
 )
-def test_pathwise_gradient(jax_dist, sp_dist, params):
+def test_pathwise_gradient(jax_dist, params):
     rng_key = random.PRNGKey(0)
-    N = 100
-    z = jax_dist(*params).sample(key=rng_key, sample_shape=(N,))
-    actual_grad = jacfwd(lambda x: jax_dist(*x).sample(key=rng_key, sample_shape=(N,)))(
-        params
-    )
-    eps = 1e-3
-    for i in range(len(params)):
-        args_lhs = [p if j != i else p - eps for j, p in enumerate(params)]
-        args_rhs = [p if j != i else p + eps for j, p in enumerate(params)]
-        cdf_dot = (sp_dist(*args_rhs).cdf(z) - sp_dist(*args_lhs).cdf(z)) / (2 * eps)
-        expected_grad = -cdf_dot / sp_dist(*params).pdf(z)
-        assert_allclose(actual_grad[i], expected_grad, rtol=0.005)
+    N = 1000000
+
+    def f(params):
+        z = jax_dist(*params).sample(key=rng_key, sample_shape=(N,))
+        return (z + z ** 2).mean(0)
+
+    def g(params):
+        d = jax_dist(*params)
+        return d.mean + d.variance + d.mean ** 2
+
+    actual_grad = grad(f)(params)
+    expected_grad = grad(g)(params)
+    assert_allclose(actual_grad, expected_grad, rtol=0.005)
 
 
 @pytest.mark.parametrize(