Creates fingerprint op.

jballe · Johannes Ballé · commit 5e98dfd5c4d5 · 2019-04-08T18:36:11.000-07:00
PiperOrigin-RevId: 242570134
diff --git a/cc/kernels/range_coding_helper_kernels.cc b/cc/kernels/range_coding_helper_kernels.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -37,19 +38,20 @@ namespace {
 namespace gtl = tensorflow::gtl;
 namespace thread = tensorflow::thread;
 using tensorflow::DEVICE_CPU;
+using tensorflow::Fingerprint64;
+using tensorflow::int32;
+using tensorflow::int64;
 using tensorflow::OpKernel;
 using tensorflow::OpKernelConstruction;
 using tensorflow::OpKernelContext;
+using tensorflow::string;
 using tensorflow::Tensor;
 using tensorflow::TensorShape;
 using tensorflow::TensorShapeUtils;
-using tensorflow::errors::InvalidArgument;
-using tensorflow::int32;
-using tensorflow::int64;
-using tensorflow::string;
-using tensorflow::uint8;
 using tensorflow::uint32;
 using tensorflow::uint64;
+using tensorflow::uint8;
+using tensorflow::errors::InvalidArgument;
 
 class PmfToCdfOp : public OpKernel {
  public:
@@ -208,5 +210,62 @@ class PmfToCdfOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name("PmfToQuantizedCdf").Device(DEVICE_CPU),
                         PmfToCdfOp);
 
+class ArrayFingerprintOp : public tensorflow::OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(tensorflow::OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, tensorflow::DataTypeCanUseMemcpy(input.dtype()),
+                InvalidArgument("Data type not supported: ",
+                                tensorflow::DataTypeString(input.dtype())));
+
+    const int64 size =
+        input.shape().num_elements() * tensorflow::DataTypeSize(input.dtype());
+    auto data = input.bit_casted_shaped<char, 1>({size});
+
+    Tensor* output;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape{}, &output));
+
+    output->scalar<int64>()() =
+        Fingerprint64({data.data(), static_cast<size_t>(data.size())});
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("ArrayFingerprint").Device(tensorflow::DEVICE_CPU),
+                        ArrayFingerprintOp);
+
+class CheckArrayFingerprintOp : public tensorflow::OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(tensorflow::OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    const Tensor& fingerprint = context->input(1);
+    OP_REQUIRES(context, tensorflow::DataTypeCanUseMemcpy(input.dtype()),
+                InvalidArgument("Data type not supported: ",
+                                tensorflow::DataTypeString(input.dtype())));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(fingerprint.shape()),
+                InvalidArgument("`fingerprint` should be a scalar"));
+
+    const int64 size =
+        input.shape().num_elements() * tensorflow::DataTypeSize(input.dtype());
+    auto data = input.bit_casted_shaped<char, 1>({size});
+
+    OP_REQUIRES(
+        context,
+        fingerprint.scalar<int64>()() ==
+            Fingerprint64({data.data(), static_cast<size_t>(data.size())}),
+        tensorflow::errors::DataLoss("Fingerprint mismatch"));
+
+    context->set_output(0, input);
+  }
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("CheckArrayFingerprint").Device(tensorflow::DEVICE_CPU),
+    CheckArrayFingerprintOp);
+
 }  // namespace
 }  // namespace tensorflow_compression
diff --git a/cc/kernels/range_coding_helper_kernels_test.cc b/cc/kernels/range_coding_helper_kernels_test.cc
@@ -40,6 +40,7 @@ using tensorflow::NodeDefBuilder;
 using tensorflow::OpsTestBase;
 using tensorflow::ShapeInferenceTestOp;
 using tensorflow::Tensor;
+using tensorflow::TensorShape;
 using tensorflow::TTypes;
 
 class PmfToQuantizedCdfOpTest : public OpsTestBase {
@@ -151,6 +152,87 @@ TEST_F(PmfToQuantizedCdfOpTest, ShapeFn) {
   INFER_OK(op, "[3,4,5]", "[d0_0,d0_1,6]");
 }
 
+class FingerprintOpTest : public tensorflow::OpsTestBase {
+ protected:
+  void MakeFingerprintOp(Tensor* tensor) {
+    TF_ASSERT_OK(tensorflow::NodeDefBuilder("fingerprint", "ArrayFingerprint")
+                     .Input(tensorflow::FakeInput(tensor->dtype()))
+                     .Finalize(node_def()));
+    TF_ASSERT_OK(InitOp());
+
+    inputs_.clear();
+    inputs_.emplace_back(tensor);
+  }
+
+  void MakeCheckFingerprintOp(Tensor* tensor, Tensor* fingerprint) {
+    TF_ASSERT_OK(
+        tensorflow::NodeDefBuilder("check_fingerprint", "CheckArrayFingerprint")
+            .Input(tensorflow::FakeInput(tensor->dtype()))
+            .Input(tensorflow::FakeInput(fingerprint->dtype()))
+            .Finalize(node_def()));
+    TF_ASSERT_OK(InitOp());
+
+    inputs_.clear();
+    inputs_.emplace_back(tensor);
+    inputs_.emplace_back(fingerprint);
+  }
+};
+
+TEST_F(FingerprintOpTest, Verify) {
+  std::random_device rd;
+  random::PhiloxRandom gen(rd(), rd());
+  random::SimplePhilox rand(&gen);
+  for (tensorflow::DataType dtype : tensorflow::kRealNumberTypes) {
+    const int rank = rand.Uniform(4);
+
+    TensorShape shape;
+    for (int i = 0; i < rank; ++i) {
+      shape.AddDim(rand.Uniform(9) + 1);
+    }
+
+    Tensor tensor(dtype, shape);
+
+    const int64 length = shape.num_elements() * tensorflow::DataTypeSize(dtype);
+    auto buffer = tensor.bit_casted_shaped<char, 1>({length});
+    buffer.setRandom();
+
+    MakeFingerprintOp(&tensor);
+    TF_ASSERT_OK(RunOpKernel());
+
+    Tensor fingerprint = *GetOutput(0);
+
+    MakeCheckFingerprintOp(&tensor, &fingerprint);
+    TF_ASSERT_OK(RunOpKernel());
+
+    // Change one byte in the buffer.
+    const int64 pos = rand.Uniform(length);
+    buffer(pos) = ~buffer(pos);
+
+    MakeCheckFingerprintOp(&tensor, &fingerprint);
+    ASSERT_FALSE(RunOpKernel().ok());
+  }
+}
+
+TEST_F(FingerprintOpTest, FingerprintShapeFn) {
+  tensorflow::ShapeInferenceTestOp op("ArrayFingerprint");
+
+  INFER_OK(op, "?", "[]");
+  INFER_OK(op, "[]", "[]");
+  INFER_OK(op, "[1]", "[]");
+  INFER_OK(op, "[1,2]", "[]");
+  INFER_OK(op, "[1,2,3]", "[]");
+}
+
+TEST_F(FingerprintOpTest, CheckFingerprintShapeFn) {
+  tensorflow::ShapeInferenceTestOp op("CheckArrayFingerprint");
+
+  INFER_OK(op, "?;?", "in0");
+  INFER_OK(op, "[];?", "in0");
+  INFER_OK(op, "[1,2];?", "in0");
+  INFER_OK(op, "[1,2,3];?", "in0");
+  INFER_ERROR("rank 0", op, "?;[1]");
+}
+
 }  // namespace
 }  // namespace tensorflow_compression
 
diff --git a/cc/ops/range_coding_ops.cc b/cc/ops/range_coding_ops.cc
@@ -87,7 +87,6 @@ encoded: A range-coded scalar string.
 precision: The number of bits for probability quantization. Must be <= 16.
 )doc");
 
-
 REGISTER_OP("RangeDecode")
     .Input("encoded: string")
     .Input("shape: int32")
@@ -120,7 +119,6 @@ precision: The number of bits for probability quantization. Must be <= 16, and
   must match the precision used by RangeEncode that produced `encoded`.
 )doc");
 
-
 REGISTER_OP("UnboundedIndexRangeEncode")
     .Input("data: int32")
     .Input("index: int32")
@@ -198,7 +196,6 @@ overflow_width: The bit width of the variable-length overflow code. Must be <=
   precision.
 )doc");
 
-
 REGISTER_OP("UnboundedIndexRangeDecode")
     .Input("encoded: string")
     .Input("index: int32")
@@ -239,7 +236,6 @@ overflow_width: The bit width of the variable-length overflow code. Must be <=
   produced `encoded`.
 )doc");
 
-
 REGISTER_OP("PmfToQuantizedCdf")
     .Input("pmf: float")
     .Output("cdf: int32")
@@ -268,6 +264,38 @@ Note that the input PMF is pre-quantization. The input PMF is not normalized
 by this op prior to quantization. Therefore the user is responsible for
 normalizing PMF if necessary.
 )doc");
+
+REGISTER_OP("ArrayFingerprint")
+    .Input("input: T")
+    .Output("fingerprint: int64")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape)
+    .Doc(R"doc(
+Produces fingerprint of the input data.
+
+input: Tensor to be fingerprinted.
+fingerprint: Fingerprint value of input.
+)doc");
+
+REGISTER_OP("CheckArrayFingerprint")
+    .Input("input: T")
+    .Input("fingerprint: int64")
+    .Output("output: T")
+    .Attr("T: realnumbertype")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      c->set_output(0, c->input(0));
+      return tensorflow::Status::OK();
+    })
+    .Doc(R"doc(
+Computes the fingerprint of `input` and checks the computed value against
+`fingerprint`. If the check fails, then this op returns an error status.
+
+input: Tensor to be fingerprinted and checked.
+fingerprint: Fingerprint value to be checked against.
+output: The same as input.
+)doc");
 // clang-format on
 
 }  // namespace