tensorflow
diff --git a/‎CONTRIBUTING.md
Lines changed: 14 additions & 14 deletions b/‎CONTRIBUTING.md
Lines changed: 14 additions & 14 deletions
diff --git a/‎tensorflow_probability/examples/jupyter_notebooks/Factorial_Mixture.ipynb
Lines changed: 1 addition & 1 deletion b/‎tensorflow_probability/examples/jupyter_notebooks/Factorial_Mixture.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorflow_probability/examples/jupyter_notebooks/Probabilistic_Layers_VAE.ipynb
Lines changed: 2 additions & 2 deletions b/‎tensorflow_probability/examples/jupyter_notebooks/Probabilistic_Layers_VAE.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎tensorflow_probability/python/distributions/__init__.py
Lines changed: 48 additions & 47 deletions b/‎tensorflow_probability/python/distributions/__init__.py
Lines changed: 48 additions & 47 deletions
diff --git a/‎tensorflow_probability/python/distributions/distribution_properties_test.py
Lines changed: 15 additions & 8 deletions b/‎tensorflow_probability/python/distributions/distribution_properties_test.py
Lines changed: 15 additions & 8 deletions
diff --git a/‎tensorflow_probability/python/experimental/mcmc/windowed_sampling_test.py
Lines changed: 1 addition & 0 deletions b/‎tensorflow_probability/python/experimental/mcmc/windowed_sampling_test.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_probability/python/experimental/sts_gibbs/gibbs_sampler.py
Lines changed: 3 additions & 3 deletions b/‎tensorflow_probability/python/experimental/sts_gibbs/gibbs_sampler.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎tensorflow_probability/python/experimental/sts_gibbs/gibbs_sampler_test.py
Lines changed: 3 additions & 2 deletions b/‎tensorflow_probability/python/experimental/sts_gibbs/gibbs_sampler_test.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎tensorflow_probability/python/internal/BUILD
Lines changed: 11 additions & 0 deletions b/‎tensorflow_probability/python/internal/BUILD
Lines changed: 11 additions & 0 deletions
diff --git a/‎tensorflow_probability/python/internal/backend/numpy/gen/tensor_shape.py
Lines changed: 11 additions & 1 deletion b/‎tensorflow_probability/python/internal/backend/numpy/gen/tensor_shape.py
Lines changed: 11 additions & 1 deletion
@@ -79,6 +79,20 @@ We strongly recommend running unit tests in an active
 extra bazel flags, so we created a wrapper script, which we suggest using. An
 example invocation (presumed to run from the root of the TFP repo:
 
+#### Dependencies
+
+To run the unit tests, you'll need several packages installed (again, we
+strongly recommend you work in a virtualenv). We include a script to do this for
+you, which also does some sanity checks on the environtment:
+
+```shell
+./testing/install_test_dependencies.sh
+```
+
+See the
+[header comments in that script](https://github.com/tensorflow/probability/blob/main/testing/install_test_dependencies.sh)
+for more details.
+
 #### Helper scripts
 
 ```shell
@@ -112,20 +126,6 @@ tfp_test //tensorflow_probability/python/distributions:joint_distribution_corout
 tfp_lints tensorflow_probability/python/distributions/joint_distribution_coroutine.py
 ```
 
-#### Dependencies
-
-To run the unit tests, you'll need several packages installed (again, we
-strongly recommend you work in a virtualenv). We include a script to do this for
-you, which also does some sanity checks on the environtment:
-
-```shell
-./testing/install_test_dependencies.sh
-```
-
-See the
-[header comments in that script](https://github.com/tensorflow/probability/blob/main/testing/install_test_dependencies.sh)
-for more details.
-
 ### Additional considerations
 
 As of early 2020, tensorflow and tf-nightly include GPU support by default,
 
@@ -69,7 +69,7 @@
       },
       "source": [
         "In this notebook we show how to use [TensorFlow Probability](https://github.com/tensorflow/probability) (TFP) to sample from a factorial Mixture of Gaussians distribution defined as:\n",
-        "$$p(x_1, ..., x_n) = \\prod_i p_i(x_i)$$ where: $$\\begin{align*} p_i &\\equiv \\frac{1}{K}\\sum_{i=1}^K \\pi_{ik}\\,\\text{Normal}\\left(\\text{loc}=\\mu_{ik},\\, \\text{scale}=\\sigma_{ik}\\right)\\\\1&=\\sum_{k=1}^K\\pi_{ik}, \\forall i.\\hphantom{MMMMMMMMMMM}\\end{align*}$$\n",
+        "$$p(x_1, ..., x_n) = \\prod_i p_i(x_i)$$ where: $$\\begin{align*} p_i &\\equiv \\frac{1}{K}\\sum_{k=1}^K \\pi_{ik}\\,\\text{Normal}\\left(\\text{loc}=\\mu_{ik},\\, \\text{scale}=\\sigma_{ik}\\right)\\\\1&=\\sum_{k=1}^K\\pi_{ik}, \\forall i.\\hphantom{MMMMMMMMMMM}\\end{align*}$$\n",
         "\n",
         "Each variable $x_i$ is modeled as a mixture of Gaussians, and the joint distribution over all $n$ variables is a product of these densities.\n",
         "\n",
 
@@ -203,12 +203,12 @@
         "train_dataset = (datasets['train']\n",
         "                 .map(_preprocess)\n",
         "                 .batch(256)\n",
-        "                 .prefetch(tf.data.experimental.AUTOTUNE)\n",
+        "                 .prefetch(tf.data.AUTOTUNE)\n",
         "                 .shuffle(int(10e3)))\n",
         "eval_dataset = (datasets['test']\n",
         "                .map(_preprocess)\n",
         "                .batch(256)\n",
-        "                .prefetch(tf.data.experimental.AUTOTUNE))"
+        "                .prefetch(tf.data.AUTOTUNE))"
       ]
     },
     {
 
@@ -166,10 +166,6 @@
 # pylint: enable=line-too-long
 
 __all__ = [
-    'FULLY_REPARAMETERIZED',
-    'NOT_REPARAMETERIZED',
-    'ReparameterizationType',
-    'Distribution',
     'Autoregressive',
     'BatchBroadcast',
     'BatchReshape',
@@ -185,34 +181,38 @@
     'Chi',
     'Chi2',
     'CholeskyLKJ',
+    'DeterminantalPointProcess',
     'Deterministic',
+    'Dirichlet',
+    'DirichletMultinomial',
+    'Distribution',
     'DoublesidedMaxwell',
-    'VectorDeterministic',
-    'DeterminantalPointProcess',
     'Empirical',
-    'ExponentiallyModifiedGaussian',
     'ExpGamma',
     'ExpInverseGamma',
     'Exponential',
-    'VectorExponentialDiag',
+    'ExponentiallyModifiedGaussian',
+    'ExpRelaxedOneHotCategorical',
+    'FiniteDiscrete',
+    'FULLY_REPARAMETERIZED',
     'Gamma',
     'GammaGamma',
-    'InverseGaussian',
+    'GaussianProcess',
+    'GaussianProcessRegressionModel',
+    'GeneralizedExtremeValue',
     'GeneralizedNormal',
     'GeneralizedPareto',
     'Geometric',
-    'GaussianProcess',
-    'GaussianProcessRegressionModel',
-    'VariationalGaussianProcess',
     'Gumbel',
-    'GeneralizedExtremeValue',
     'HalfCauchy',
     'HalfNormal',
     'HalfStudentT',
     'HiddenMarkovModel',
     'Horseshoe',
     'Independent',
+    'independent_joint_distribution_from_structure',
     'InverseGamma',
+    'InverseGaussian',
     'JohnsonSU',
     'JointDistribution',
     'JointDistributionCoroutine',
@@ -221,25 +221,55 @@
     'JointDistributionNamedAutoBatched',
     'JointDistributionSequential',
     'JointDistributionSequentialAutoBatched',
+    'kl_divergence',
     'Kumaraswamy',
     'LambertWDistribution',
     'LambertWNormal',
     'Laplace',
     'LinearGaussianStateSpaceModel',
     'LKJ',
     'Logistic',
+    'LogitNormal',
     'LogLogistic',
     'LogNormal',
-    'LogitNormal',
     'MarkovChain',
+    'Masked',
+    'MatrixNormalLinearOperator',
+    'MatrixTLinearOperator',
+    'Mixture',
+    'MixtureSameFamily',
     'Moyal',
+    'Multinomial',
+    'MultivariateNormalDiag',
+    'MultivariateNormalDiagPlusLowRank',
+    'MultivariateNormalFullCovariance',
+    'MultivariateNormalLinearOperator',
+    'MultivariateNormalTriL',
+    'MultivariateStudentTLinearOperator',
+    'mvn_conjugate_linear_update',
     'NegativeBinomial',
     'Normal',
+    'normal_conjugates_known_scale_posterior',
+    'normal_conjugates_known_scale_predictive',
     'NormalInverseGaussian',
+    'NOT_REPARAMETERIZED',
+    'OneHotCategorical',
+    'OrderedLogistic',
+    'Pareto',
+    'PERT',
     'PixelCNN',
+    'PlackettLuce',
     'Poisson',
     'PoissonLogNormalQuadratureCompound',
+    'PowerSpherical',
     'ProbitBernoulli',
+    'quadrature_scheme_lognormal_gauss_hermite',
+    'quadrature_scheme_lognormal_quantiles',
+    'QuantizedDistribution',
+    'RegisterKL',
+    'RelaxedBernoulli',
+    'RelaxedOneHotCategorical',
+    'ReparameterizationType',
     'Sample',
     'SigmoidBeta',
     'SinhArcsinh',
@@ -248,47 +278,18 @@
     'StoppingRatioLogistic',
     'StudentT',
     'StudentTProcess',
+    'TransformedDistribution',
     'Triangular',
     'TruncatedCauchy',
     'TruncatedNormal',
     'Uniform',
-    'Masked',
-    'MatrixNormalLinearOperator',
-    'MatrixTLinearOperator',
-    'MultivariateNormalDiag',
-    'MultivariateNormalFullCovariance',
-    'MultivariateNormalLinearOperator',
-    'MultivariateNormalTriL',
-    'MultivariateNormalDiagPlusLowRank',
-    'MultivariateStudentTLinearOperator',
-    'Dirichlet',
-    'DirichletMultinomial',
-    'Multinomial',
+    'VariationalGaussianProcess',
+    'VectorDeterministic',
+    'VectorExponentialDiag',
     'VonMises',
     'VonMisesFisher',
     'Weibull',
     'WishartLinearOperator',
     'WishartTriL',
-    'TransformedDistribution',
-    'QuantizedDistribution',
-    'Mixture',
-    'MixtureSameFamily',
-    'ExpRelaxedOneHotCategorical',
-    'OneHotCategorical',
-    'OrderedLogistic',
-    'Pareto',
-    'PERT',
-    'PlackettLuce',
-    'PowerSpherical',
-    'RelaxedBernoulli',
-    'RelaxedOneHotCategorical',
     'Zipf',
-    'kl_divergence',
-    'RegisterKL',
-    'independent_joint_distribution_from_structure',
-    'mvn_conjugate_linear_update',
-    'normal_conjugates_known_scale_posterior',
-    'normal_conjugates_known_scale_predictive',
-    'quadrature_scheme_lognormal_gauss_hermite',
-    'quadrature_scheme_lognormal_quantiles',
 ]
@@ -485,20 +485,23 @@ def _test_slicing(self, data, dist_name, dist):
     # slicing the samples from the original.
     self.assertAllEqual(sliced_samples.shape, sliced_dist_samples.shape)
 
-    # Check that a sliced distribution can compute the log_prob of its own
-    # samples (up to numerical validation errors).
+    # Check that the sliced dist's log_prob agrees with slicing the original's
+    # log_prob.
+    # First, we make sure that the original sample we have passes the
+    # original distribution's validations.  We break the bijector cache here
+    # because slicing will break it later too.
     with tfp_hps.no_tf_rank_errors():
       try:
-        lp = self.evaluate(dist.log_prob(samples))
+        lp = self.evaluate(dist.log_prob(
+            samples + tf.constant(0, dtype=samples.dtype)))
       except tf.errors.InvalidArgumentError:
         # TODO(b/129271256): d.log_prob(d.sample()) should not fail
         #     validate_args checks.
-        # We only tolerate this case for the non-sliced dist.
+        # `return` here passes the example.  If we `hp.assume(False)`
+        # instead, that would demand from Hypothesis that it find many
+        # examples where this check (and the next one) passes;
+        # empirically, it seems to complain that that's too hard.
         return
-      sliced_lp = self.evaluate(sliced_dist.log_prob(sliced_samples))
-
-    # Check that the sliced dist's log_prob agrees with slicing the original's
-    # log_prob.
 
     # This `hp.assume` is suppressing array sizes that cause the sliced and
     # non-sliced distribution to follow different Eigen code paths.  Those
@@ -518,6 +521,10 @@ def _test_slicing(self, data, dist_name, dist):
     hp.note('Non-packetization check {}'.format(all_non_packetized))
     hp.assume(all_packetized or all_non_packetized)
 
+    # Actually evaluate and test the sliced log_prob
+    with tfp_hps.no_tf_rank_errors():
+      sliced_lp = self.evaluate(sliced_dist.log_prob(sliced_samples))
+
     self.assertAllClose(lp[slices], sliced_lp,
                         atol=SLICING_LOGPROB_ATOL[dist_name],
                         rtol=SLICING_LOGPROB_RTOL[dist_name])
 
@@ -621,6 +621,7 @@ def hmc_kwargs(self):
                                   ('nuts_jit_sig', 'nuts'))
   def test_base_kernel(self, kind):
     self.skip_if_no_xla()
+    self.skipTest('b/195070752')
 
     if JAX_MODE:
       input_signature = None
 
@@ -340,9 +340,9 @@ def one_step_predictive(model,
       distribution of each timestep given previous timesteps.
   """
   dtype = dtype_util.common_dtype([
-      posterior_samples.level_scale.dtype,
-      posterior_samples.observation_noise_scale.dtype,
-      posterior_samples.level.dtype,
+      posterior_samples.level_scale,
+      posterior_samples.observation_noise_scale,
+      posterior_samples.level,
       original_mean,
       original_scale], dtype_hint=tf.float32)
   num_observed_steps = prefer_static.shape(posterior_samples.level)[-1]
 
@@ -202,7 +202,7 @@ def reshape_chain_and_sample(x):
 
   @parameterized.named_parameters(
       {'testcase_name': 'float32_xla', 'dtype': tf.float32, 'use_xla': True},
-      {'testcase_name': 'float16', 'dtype': tf.float16, 'use_xla': False})
+      {'testcase_name': 'float64', 'dtype': tf.float64, 'use_xla': False})
   def test_end_to_end_prediction_works_and_is_deterministic(
       self, dtype, use_xla):
     if not tf.executing_eagerly():
@@ -211,7 +211,8 @@ def test_end_to_end_prediction_works_and_is_deterministic(
     model, observed_time_series, is_missing = self._build_test_model(
         num_timesteps=5,
         batch_shape=[3],
-        prior_class=gibbs_sampler.XLACompilableInverseGamma)
+        prior_class=gibbs_sampler.XLACompilableInverseGamma,
+        dtype=dtype)
 
     @tf.function(jit_compile=use_xla)
     def do_sampling(observed_time_series, is_missing):
 
@@ -233,6 +233,17 @@ multi_substrate_py_library(
     ],
 )
 
+multi_substrate_py_test(
+    name = "custom_gradient_test",
+    srcs = ["custom_gradient_test.py"],
+    deps = [
+        ":custom_gradient",
+        # tensorflow dep,
+        "//tensorflow_probability/python/internal:test_util",
+        "//tensorflow_probability/python/math:gradient",
+    ],
+)
+
 py_test(
     name = "cache_util_test",
     size = "small",
 
@@ -213,7 +213,17 @@ def dimension_at_index(shape, index):
 
 @tf_export(v1=["Dimension"])
 class Dimension(object):
-  """Represents the value of one dimension in a TensorShape."""
+  """Represents the value of one dimension in a TensorShape.
+
+  @compatibility(TF2)
+  In TF2, members of a `TensorShape` object are integers. The `Dimension` class
+  is not part of TF2's data model.
+
+  Please refer to the [TensorShape section of the migration guide]
+  (https://www.tensorflow.org/guide/migrate/index#tensorshape) on common code
+  patterns adapting Dimension objects to a TF2 syntax.
+  @end_compatibility
+  """
 
   __slots__ = ["_value"]