Adds mixed precision support to example models.

Johannes Ballé · copybara-github · commit 963aa2d76aa4 · 2022-02-16T13:58:57.000-08:00
- Also fixes a missing cast in continuous_base.py and the
  corresponding unit test.

PiperOrigin-RevId: 429133533
Change-Id: If1f70b684b0d63540a4f3100e1a1074195ec4527
diff --git a/models/bls2017.py b/models/bls2017.py
@@ -107,6 +107,7 @@ def call(self, x, training):
     """Computes rate and distortion losses."""
     entropy_model = tfc.ContinuousBatchedEntropyModel(
         self.prior, coding_rank=3, compression=False)
+    x = tf.cast(x, self.compute_dtype)  # TODO(jonycgn): Why is this necessary?
     y = self.analysis_transform(x)
     y_hat, bits = entropy_model(y, training=training)
     x_hat = self.synthesis_transform(y_hat)
@@ -115,6 +116,7 @@ def call(self, x, training):
     bpp = tf.reduce_sum(bits) / num_pixels
     # Mean squared error across pixels.
     mse = tf.reduce_mean(tf.math.squared_difference(x, x_hat))
+    mse = tf.cast(mse, bpp.dtype)
     # The rate-distortion Lagrangian.
     loss = bpp + self.lmbda * mse
     return loss, bpp, mse
@@ -166,7 +168,7 @@ def compress(self, x):
     """Compresses an image."""
     # Add batch dimension and cast to float.
     x = tf.expand_dims(x, 0)
-    x = tf.cast(x, dtype=tf.float32)
+    x = tf.cast(x, dtype=self.compute_dtype)
     y = self.analysis_transform(x)
     # Preserve spatial shapes of both image and latents.
     x_shape = tf.shape(x)[1:-1]
@@ -195,7 +197,7 @@ def check_image_size(image, patchsize):
 
 def crop_image(image, patchsize):
   image = tf.image.random_crop(image, (patchsize, patchsize, 3))
-  return tf.cast(image, tf.float32)
+  return tf.cast(image, tf.keras.mixed_precision.global_policy().compute_dtype)
 
 
 def get_dataset(name, split, args):
@@ -232,6 +234,8 @@ def get_custom_dataset(split, args):
 
 def train(args):
   """Instantiates and trains the model."""
+  if args.precision_policy:
+    tf.keras.mixed_precision.set_global_policy(args.precision_policy)
   if args.check_numerics:
     tf.debugging.enable_check_numerics()
 
@@ -391,6 +395,9 @@ def parse_args(argv):
       "--preprocess_threads", type=int, default=16,
       help="Number of CPU threads to use for parallel decoding of training "
            "images.")
+  train_cmd.add_argument(
+      "--precision_policy", type=str, default=None,
+      help="Policy for `tf.keras.mixed_precision` training.")
   train_cmd.add_argument(
       "--check_numerics", action="store_true",
       help="Enable TF support for catching NaN and Inf in tensors.")
diff --git a/models/bmshj2018.py b/models/bmshj2018.py
@@ -162,6 +162,7 @@ def call(self, x, training):
     side_entropy_model = tfc.ContinuousBatchedEntropyModel(
         self.hyperprior, coding_rank=3, compression=False)
 
+    x = tf.cast(x, self.compute_dtype)  # TODO(jonycgn): Why is this necessary?
     y = self.analysis_transform(x)
     z = self.hyper_analysis_transform(abs(y))
     z_hat, side_bits = side_entropy_model(z, training=training)
@@ -174,6 +175,7 @@ def call(self, x, training):
     bpp = (tf.reduce_sum(bits) + tf.reduce_sum(side_bits)) / num_pixels
     # Mean squared error across pixels.
     mse = tf.reduce_mean(tf.math.squared_difference(x, x_hat))
+    mse = tf.cast(mse, bpp.dtype)
     # The rate-distortion Lagrangian.
     loss = bpp + self.lmbda * mse
     return loss, bpp, mse
@@ -228,7 +230,7 @@ def compress(self, x):
     """Compresses an image."""
     # Add batch dimension and cast to float.
     x = tf.expand_dims(x, 0)
-    x = tf.cast(x, dtype=tf.float32)
+    x = tf.cast(x, dtype=self.compute_dtype)
     y = self.analysis_transform(x)
     z = self.hyper_analysis_transform(abs(y))
     # Preserve spatial shapes of image and latents.
@@ -269,7 +271,7 @@ def check_image_size(image, patchsize):
 
 def crop_image(image, patchsize):
   image = tf.image.random_crop(image, (patchsize, patchsize, 3))
-  return tf.cast(image, tf.float32)
+  return tf.cast(image, tf.keras.mixed_precision.global_policy().compute_dtype)
 
 
 def get_dataset(name, split, args):
@@ -306,6 +308,8 @@ def get_custom_dataset(split, args):
 
 def train(args):
   """Instantiates and trains the model."""
+  if args.precision_policy:
+    tf.keras.mixed_precision.set_global_policy(args.precision_policy)
   if args.check_numerics:
     tf.debugging.enable_check_numerics()
 
@@ -476,6 +480,9 @@ def parse_args(argv):
       "--preprocess_threads", type=int, default=16,
       help="Number of CPU threads to use for parallel decoding of training "
            "images.")
+  train_cmd.add_argument(
+      "--precision_policy", type=str, default=None,
+      help="Policy for `tf.keras.mixed_precision` training.")
   train_cmd.add_argument(
       "--check_numerics", action="store_true",
       help="Enable TF support for catching NaN and Inf in tensors.")
diff --git a/models/ms2020.py b/models/ms2020.py
@@ -199,6 +199,7 @@ def __init__(self, lmbda,
 
   def call(self, x, training):
     """Computes rate and distortion losses."""
+    x = tf.cast(x, self.compute_dtype)  # TODO(jonycgn): Why is this necessary?
     # Build the encoder (analysis) half of the hierarchical autoencoder.
     y = self.analysis_transform(x)
     y_shape = tf.shape(y)[1:-1]
@@ -276,6 +277,7 @@ def call(self, x, training):
     # Mean squared error across pixels.
     # Don't clip or round pixel values while training.
     mse = tf.reduce_mean(tf.math.squared_difference(x, x_hat))
+    mse = tf.cast(mse, total_bpp.dtype)
 
     # Calculate and return the rate-distortion loss: R + lambda * D.
     loss = total_bpp + self.lmbda * mse
@@ -333,7 +335,7 @@ def compress(self, x):
     """Compresses an image."""
     # Add batch dimension and cast to float.
     x = tf.expand_dims(x, 0)
-    x = tf.cast(x, dtype=tf.float32)
+    x = tf.cast(x, dtype=self.compute_dtype)
 
     y_strings = []
     x_shape = tf.shape(x)[1:-1]
@@ -439,7 +441,7 @@ def check_image_size(image, patchsize):
 
 def crop_image(image, patchsize):
   image = tf.image.random_crop(image, (patchsize, patchsize, 3))
-  return tf.cast(image, tf.float32)
+  return tf.cast(image, tf.keras.mixed_precision.global_policy().compute_dtype)
 
 
 def get_dataset(name, split, args):
@@ -476,6 +478,8 @@ def get_custom_dataset(split, args):
 
 def train(args):
   """Instantiates and trains the model."""
+  if args.precision_policy:
+    tf.keras.mixed_precision.set_global_policy(args.precision_policy)
   if args.check_numerics:
     tf.debugging.enable_check_numerics()
 
@@ -661,6 +665,9 @@ def parse_args(argv):
       "--preprocess_threads", type=int, default=16,
       help="Number of CPU threads to use for parallel decoding of training "
            "images.")
+  train_cmd.add_argument(
+      "--precision_policy", type=str, default=None,
+      help="Policy for `tf.keras.mixed_precision` training.")
   train_cmd.add_argument(
       "--check_numerics", action="store_true",
       help="Enable TF support for catching NaN and Inf in tensors.")
diff --git a/tensorflow_compression/python/entropy_models/continuous_base.py b/tensorflow_compression/python/entropy_models/continuous_base.py
@@ -233,8 +233,7 @@ def _build_tables(self, prior, precision, offset=None):
       CDF table, CDF offsets, CDF lengths.
     """
     precision = int(precision)
-    if offset is None:
-      offset = 0.
+    offset = tf.cast(0 if offset is None else offset, prior.dtype)
     # Subclasses should have already caught this, but better be safe.
     assert not prior.event_shape.rank
 
diff --git a/tensorflow_compression/python/entropy_models/continuous_batched_test.py b/tensorflow_compression/python/entropy_models/continuous_batched_test.py
@@ -198,7 +198,7 @@ def test_dtypes_are_correct_with_mixed_precision(self):
     tf.keras.mixed_precision.set_global_policy("mixed_float16")
     try:
       noisy = uniform_noise.NoisyNormal(
-          loc=tf.constant(0, dtype=tf.float64),
+          loc=tf.constant(.5, dtype=tf.float64),
           scale=tf.constant(1, dtype=tf.float64))
       em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
       self.assertEqual(em.bottleneck_dtype, tf.float16)