Adds op for stochastic rounding.

Johannes Ballé · copybara-github · commit 99864d303a4f · 2022-12-01T14:40:11.000-08:00
PiperOrigin-RevId: 492303397
Change-Id: I3d98126948c75e62ca8aed2b85947f530ae03953
diff --git a/tensorflow_compression/cc/kernels/quantization_kernels.cc b/tensorflow_compression/cc/kernels/quantization_kernels.cc
@@ -0,0 +1,111 @@
+/* Copyright 2022 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <cmath>
+#include <cstdint>
+#include <random>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/status.h"
+
+namespace tensorflow_compression {
+namespace {
+namespace errors = tensorflow::errors;
+using tensorflow::DEVICE_CPU;
+using tensorflow::OpKernel;
+using tensorflow::OpKernelConstruction;
+using tensorflow::OpKernelContext;
+using tensorflow::Tensor;
+
+// Xoroshiro256+ algorithm, adapted from
+// https://prng.di.unimi.it/xoshiro256plus.c
+inline uint64_t next_random(uint64_t* state) {
+  const uint64_t result = state[0] + state[3];
+  const uint64_t t = state[1] << 17;
+  state[2] ^= state[0];
+  state[3] ^= state[1];
+  state[1] ^= state[2];
+  state[0] ^= state[3];
+  state[2] ^= t;
+  state[3] = (state[3] << 45) | (state[3] >> (64 - 45));
+  return result;
+}
+
+template <typename T>
+class StochasticRoundOp : public OpKernel {
+ public:
+  explicit StochasticRoundOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& inputs_tensor = context->input(0);
+    auto inputs = inputs_tensor.flat<T>();
+
+    OP_REQUIRES(context, context->input(1).dims() == 0,
+                errors::InvalidArgument("step_size must be a scalar."));
+    const float step_size = context->input(1).scalar<float>()();
+
+    auto seed = context->input(2).flat<int32_t>();
+
+    Tensor* outputs_tensor;
+    OP_REQUIRES_OK(context, context->allocate_output(0, inputs_tensor.shape(),
+                                                     &outputs_tensor));
+    auto outputs = outputs_tensor->flat<int32_t>();
+
+    uint64_t random_state[4];
+
+    if (seed.size()) {
+      std::seed_seq seq(seed.data(), seed.data() + seed.size());
+      seq.generate(reinterpret_cast<uint32_t*>(random_state),
+                   reinterpret_cast<uint32_t*>(random_state + 4));
+    } else {
+      // Seed the random state from system clock, in a best-effort fashion.
+      uint64_t seed =
+          std::chrono::high_resolution_clock::now().time_since_epoch().count();
+      std::seed_seq seq{seed, seed >> 32};
+      seq.generate(reinterpret_cast<uint32_t*>(random_state),
+                   reinterpret_cast<uint32_t*>(random_state + 4));
+    }
+
+    for (int64_t i = 0; i < inputs.size(); ++i) {
+      // Promote 16-bit types to 32 bit.
+      float number = static_cast<float>(inputs(i)) / step_size;
+      float integral = std::floor(number);
+      outputs(i) = integral;
+      // Regardless of T, comparing in float32 is accurate enough here.
+      float fractional = number - integral;
+      float random =
+          (next_random(random_state) >> 40) * 0x1.0p-24f;  // from [0, 1)
+      if (random < fractional) {
+        ++outputs(i);
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("StochasticRound")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<tensorflow::bfloat16>("T"),
+                        StochasticRoundOp<tensorflow::bfloat16>);
+REGISTER_KERNEL_BUILDER(
+    Name("StochasticRound").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
+    StochasticRoundOp<Eigen::half>);
+REGISTER_KERNEL_BUILDER(
+    Name("StochasticRound").Device(DEVICE_CPU).TypeConstraint<float>("T"),
+    StochasticRoundOp<float>);
+
+}  // namespace
+}  // namespace tensorflow_compression
diff --git a/tensorflow_compression/cc/ops/quantization_ops.cc b/tensorflow_compression/cc/ops/quantization_ops.cc
@@ -0,0 +1,47 @@
+/* Copyright 2022 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+
+namespace tensorflow_compression {
+namespace {
+
+REGISTER_OP("StochasticRound")
+    .Attr("T: {bfloat16, float16, float32}")
+    .Input("inputs: T")
+    .Input("step_size: float32")
+    .Input("seed: int32")
+    .Output("outputs: int32")
+    .SetShapeFn(tensorflow::shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Rounds `inputs / step_size` stochastically.
+
+This op computes the elementwise function:
+
+output = {
+  floor(x)       with prob.   p = x - floor(x)
+  floor(x) + 1   with prob.   1 - p
+}
+where x = input / step_size.
+
+inputs: Floating point tensor to be rounded.
+step_size: Scalar tensor. Step size for rounding.
+seed: Arbitrary shape tensor. Seed for random number generator. If it has no
+  elements, seeding is attempted from system time.
+outputs: Integer tensor of same shape as `inputs`, containing rounded values.
+)doc");
+
+}  // namespace
+}  // namespace tensorflow_compression
diff --git a/tensorflow_compression/python/ops/BUILD b/tensorflow_compression/python/ops/BUILD
@@ -63,6 +63,12 @@ py_test(
     deps = [":round_ops"],
 )
 
+py_test(
+    name = "quantization_ops_test",
+    srcs = ["quantization_ops_test.py"],
+    deps = [":gen_ops"],
+)
+
 filegroup(
     name = "py_src",
     srcs = glob(["*.py"]),
diff --git a/tensorflow_compression/python/ops/gen_ops.py b/tensorflow_compression/python/ops/gen_ops.py
@@ -34,5 +34,6 @@
     "pmf_to_quantized_cdf",
     "run_length_gamma_decode",
     "run_length_gamma_encode",
+    "stochastic_round",
 ]
 # pylint:enable=undefined-all-variable
diff --git a/tensorflow_compression/python/ops/quantization_ops_test.py b/tensorflow_compression/python/ops/quantization_ops_test.py
@@ -0,0 +1,78 @@
+# Copyright 2022 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Quantization tests."""
+
+import time
+from absl.testing import parameterized
+import tensorflow as tf
+from tensorflow_compression.python.ops import gen_ops
+
+
+class QuantizationOpsTest(tf.test.TestCase, parameterized.TestCase):
+  """Python test for quantization ops."""
+
+  @parameterized.parameters(tf.bfloat16, tf.float16, tf.float32)
+  def test_difference_is_at_most_one(self, dtype):
+    values = tf.random.uniform((100,), -100., 100., dtype=dtype)
+    rounded = gen_ops.stochastic_round(values, 1., ())
+    self.assertEqual(rounded.dtype, tf.int32)
+    self.assertAllClose(values, rounded, atol=1, rtol=0)
+
+  def test_identical_seed_yields_identical_output(self):
+    values = tf.random.uniform((100,), -100., 100., dtype=tf.float32)
+    rounded1 = gen_ops.stochastic_round(values, 1., (123, 456))
+    self.assertEqual(rounded1.dtype, tf.int32)
+    rounded2 = gen_ops.stochastic_round(values, 1., (123, 456))
+    self.assertEqual(rounded2.dtype, tf.int32)
+    rounded3 = gen_ops.stochastic_round(values, 1., (456, 789))
+    self.assertEqual(rounded3.dtype, tf.int32)
+    self.assertAllEqual(rounded1, rounded2)
+    self.assertNotAllEqual(rounded1, rounded3)
+
+  def test_clock_seed_yields_different_output(self):
+    values = tf.random.uniform((100,), -100., 100., dtype=tf.float32)
+    rounded1 = gen_ops.stochastic_round(values, 1., ())
+    self.assertEqual(rounded1.dtype, tf.int32)
+    time.sleep(1.)  # Ensure even on a low-resolution clock, we change seed.
+    rounded2 = gen_ops.stochastic_round(values, 1., ())
+    self.assertEqual(rounded2.dtype, tf.int32)
+    self.assertNotAllEqual(rounded1, rounded2)
+
+  @parameterized.parameters(1., .75, 1e-4)
+  def test_rounding_is_deterministic_at_integers(self, step_size):
+    values = tf.random.uniform((100,), -100, 100, dtype=tf.int32)
+    rounded = gen_ops.stochastic_round(
+        step_size * tf.cast(values, tf.float32), step_size, ())
+    self.assertEqual(rounded.dtype, tf.int32)
+    self.assertAllEqual(values, rounded)
+
+  @parameterized.parameters(1., .75, 1e-4)
+  def test_difference_at_half_integers_is_at_most_one_half(self, step_size):
+    values = tf.range(-10, 10, dtype=tf.float32) + .5
+    rounded = gen_ops.stochastic_round(step_size * values, step_size, ())
+    self.assertEqual(rounded.dtype, tf.int32)
+    self.assertAllClose(values, rounded, atol=.5, rtol=0)
+
+  def test_rounding_is_unbiased(self):
+    values = tf.random.uniform((20,), -100., 100., dtype=tf.float32)
+    replicated = tf.broadcast_to(values, (100000, 20))
+    rounded = gen_ops.stochastic_round(replicated, 1., ())
+    self.assertEqual(rounded.dtype, tf.int32)
+    averaged = tf.reduce_mean(tf.cast(rounded, tf.float32), axis=0)
+    self.assertAllClose(values, averaged, atol=5e-3, rtol=0)
+
+
+if __name__ == "__main__":
+  tf.test.main()

Original file line number	Diff line number	Diff line change
`@@ -34,5 +34,6 @@`
`34`	`34`	`"pmf_to_quantized_cdf",`
`35`	`35`	`"run_length_gamma_decode",`
`36`	`36`	`"run_length_gamma_encode",`
	`37`	`+ "stochastic_round",`
`37`	`38`	`]`
`38`	`39`	`# pylint:enable=undefined-all-variable`