quantized transpose operator (#20817)

RafLit · web-flow · commit 54e122c97b65 · 2022-03-07T08:54:45.000+01:00
* add quantized transpose

* add quantized transpose test

* add license to new files

* check support, fix warnings

* remove inplace quantized transpose

* fix inplace quantized transpose

* add quantized transpose test

* separate transpose ops

* review fixes

* add fallbacks for NP and ND

* fix formatting

* fix formatting

* update np transpose test

* add operators to amp lists

* add macro operator creation

* add tests

* review fix

* remove relative includes
diff --git a/python/mxnet/amp/lists/symbol_fp16.py b/python/mxnet/amp/lists/symbol_fp16.py
@@ -108,7 +108,9 @@
     '_contrib_quantized_elemwise_add',
     '_contrib_quantized_act',
     '_contrib_quantized_reshape',
+    '_contrib_quantized_transpose',
     '_npx_quantized_reshape',
+    '_npx_quantized_transpose',
     '_image_crop',
     '_linspace',
     '_contrib_requantize',
diff --git a/src/operator/nn/dnnl/dnnl_transpose.cc b/src/operator/nn/dnnl/dnnl_transpose.cc
@@ -71,7 +71,7 @@ DNNLTransposeFwd::DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArr
   }
 
   dnnl_memory_desc_t dst_fmt;
-  dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides);
+  dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, get_dnnl_type_t(data.dtype()), strides);
 
   dst_md_ = std::make_shared<dnnl::memory::desc>(dst_fmt);
   out_    = std::make_shared<dnnl::memory>(*dst_md_, engine, nullptr);
diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h
@@ -27,6 +27,7 @@
 #include <vector>
 #include <algorithm>
 #include <string>
+#include <set>
 #include <utility>
 #include <unordered_map>
 #include "../tensor/matrix_op-inl.h"
@@ -175,6 +176,66 @@ void NumpyTranspose(const nnvm::NodeAttrs& attrs,
   }
 }
 
+inline bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs,
+                                mxnet::ShapeVector* in_attrs,
+                                mxnet::ShapeVector* out_attrs) {
+  const NumpyTransposeParam& param = nnvm::get<NumpyTransposeParam>(attrs.parsed);
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  mxnet::TShape& shp     = (*in_attrs)[0];
+  mxnet::TShape& out_shp = (*out_attrs)[0];
+
+  int ndim = -1;
+  if (ndim_is_known(shp)) {
+    ndim = shp.ndim();
+  } else if (ndim_is_known(out_shp)) {
+    ndim = out_shp.ndim();
+  }
+  if (ndim < 0) {
+    return false;
+  }
+  if (out_shp.ndim() >= 0 && shp.ndim() >= 0) {
+    CHECK_EQ(out_shp.ndim(), shp.ndim());
+  }
+
+  mxnet::TShape get(ndim, -1);
+  mxnet::TShape ret(ndim, -1);
+
+  if (ndim_is_known(param.axes)) {
+    CHECK_EQ(ndim, param.axes.ndim())
+        << "The number of axes does not match the dimension of the tensor. axes = " << param.axes
+        << ", input tensor shape = " << shp;
+    mxnet::TShape axes = common::CanonicalizeAxes(param.axes);
+    std::set<dim_t> axes_set(axes.begin(), axes.end());
+    CHECK_EQ(axes_set.size(), axes.ndim()) << "ValueError: Repeated axis in transpose."
+                                           << " param.axes = " << param.axes;
+    if (ndim_is_known(shp)) {
+      for (int i = 0; i < ndim; ++i) {
+        ret[i] = shp[axes[i]];
+      }
+    }
+    if (ndim_is_known(out_shp)) {
+      for (int i = 0; i < ndim; ++i) {
+        get[axes[i]] = out_shp[i];
+      }
+    }
+  } else {
+    if (ndim_is_known(shp)) {
+      for (int i = 0; i < ndim; ++i) {
+        ret[i] = shp[ndim - 1 - i];
+      }
+    }
+    if (ndim_is_known(out_shp)) {
+      for (int i = 0; i < ndim; ++i) {
+        get[ndim - 1 - i] = out_shp[i];
+      }
+    }
+  }
+  SHAPE_ASSIGN_CHECK(*in_attrs, 0, get);
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
+  return shape_is_known(*in_attrs) && shape_is_known(*out_attrs);
+}
+
 template <typename xpu>
 void NumpyColumnStackForward(const nnvm::NodeAttrs& attrs,
                              const OpContext& ctx,
diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc
@@ -45,65 +45,6 @@ DMLC_REGISTER_PARAMETER(NumpyDiagParam);
 DMLC_REGISTER_PARAMETER(NumpyDiagonalParam);
 DMLC_REGISTER_PARAMETER(NumpyDiagflatParam);
 
-bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs,
-                         mxnet::ShapeVector* in_attrs,
-                         mxnet::ShapeVector* out_attrs) {
-  const NumpyTransposeParam& param = nnvm::get<NumpyTransposeParam>(attrs.parsed);
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  mxnet::TShape& shp     = (*in_attrs)[0];
-  mxnet::TShape& out_shp = (*out_attrs)[0];
-
-  int ndim = -1;
-  if (ndim_is_known(shp)) {
-    ndim = shp.ndim();
-  } else if (ndim_is_known(out_shp)) {
-    ndim = out_shp.ndim();
-  }
-  if (ndim < 0) {
-    return false;
-  }
-  if (out_shp.ndim() >= 0 && shp.ndim() >= 0) {
-    CHECK_EQ(out_shp.ndim(), shp.ndim());
-  }
-
-  mxnet::TShape get(ndim, -1);
-  mxnet::TShape ret(ndim, -1);
-
-  if (ndim_is_known(param.axes)) {
-    CHECK_EQ(ndim, param.axes.ndim())
-        << "The number of axes does not match the dimension of the tensor. axes = " << param.axes
-        << ", input tensor shape = " << shp;
-    mxnet::TShape axes = common::CanonicalizeAxes(param.axes);
-    std::set<dim_t> axes_set(axes.begin(), axes.end());
-    CHECK_EQ(axes_set.size(), axes.ndim()) << "ValueError: Repeated axis in transpose."
-                                           << " param.axes = " << param.axes;
-    if (ndim_is_known(shp)) {
-      for (int i = 0; i < ndim; ++i) {
-        ret[i] = shp[axes[i]];
-      }
-    }
-    if (ndim_is_known(out_shp)) {
-      for (int i = 0; i < ndim; ++i) {
-        get[axes[i]] = out_shp[i];
-      }
-    }
-  } else {
-    if (ndim_is_known(shp)) {
-      for (int i = 0; i < ndim; ++i) {
-        ret[i] = shp[ndim - 1 - i];
-      }
-    }
-    if (ndim_is_known(out_shp)) {
-      for (int i = 0; i < ndim; ++i) {
-        get[ndim - 1 - i] = out_shp[i];
-      }
-    }
-  }
-  SHAPE_ASSIGN_CHECK(*in_attrs, 0, get);
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
-  return shape_is_known(*in_attrs) && shape_is_known(*out_attrs);
-}
 #if MXNET_USE_ONEDNN == 1
 
 static void NumpyTransposeComputeExCPU(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/quantization/dnnl/dnnl_quantized_transpose.cc b/src/operator/quantization/dnnl/dnnl_quantized_transpose.cc
@@ -0,0 +1,102 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file dnnl_quantized_transpose.cc
+ * \author: Rafal Litka, rafal.litka@intel.com
+ */
+#if MXNET_USE_ONEDNN == 1
+#include "operator/numpy/np_matrix_op-inl.h"
+#include "operator/tensor/matrix_op-inl.h"
+#include "operator/nn/dnnl/dnnl_transpose-inl.h"
+
+namespace mxnet {
+namespace op {
+
+inline static bool QuantizedTransposeStorageType(const nnvm::NodeAttrs& attrs,
+                                                 const int dev_mask,
+                                                 DispatchMode* dispatch_mode,
+                                                 std::vector<int>* in_attrs,
+                                                 std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 3U);
+  CHECK_EQ(out_attrs->size(), 3U);
+  return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
+}
+
+bool SupportDNNLQuantizedTranspose(const NDArray& data) {
+  auto data_ndim = data.shape().ndim();
+
+  if (data_ndim > 4 || data_ndim == 0 || data.shape().Size() == 0)
+    return false;
+
+  return true;
+}
+typedef void (*TransposeFallbackFunAny)(const nnvm::NodeAttrs&,
+                                        const OpContext&,
+                                        const std::vector<TBlob>&,
+                                        const std::vector<OpReqType>&,
+                                        const std::vector<TBlob>&);
+
+template <class ParamType, TransposeFallbackFunAny TransposeFallback>
+static void DNNLQuantizedTransposeForward(const nnvm::NodeAttrs& attrs,
+                                          const OpContext& ctx,
+                                          const std::vector<NDArray>& inputs,
+                                          const std::vector<OpReqType>& req,
+                                          const std::vector<NDArray>& outputs) {
+  CHECK(inputs[0].dtype() == mshadow::kUint8 || inputs[0].dtype() == mshadow::kInt8)
+      << "dnnl_quantized_transpose only supports uint8 and int8 as input type";
+  if (req[0] == kNullOp) {
+    return;
+  }
+  CHECK_EQ(inputs.size(), 3U);
+  CHECK_EQ(outputs.size(), 3U);
+  if (SupportDNNLQuantizedTranspose(inputs[0])) {
+    DNNLRun(DNNLTransposeForward<ParamType>, attrs, ctx, inputs[0], req[0], outputs[0]);
+  } else {
+    FallBackCompute(TransposeFallback, attrs, ctx, inputs, req, outputs);
+  }
+  outputs[1].data().dptr<float>()[0] = inputs[1].data().dptr<float>()[0];
+  outputs[2].data().dptr<float>()[0] = inputs[2].data().dptr<float>()[0];
+}
+
+NNVM_REGISTER_OP(_npx_quantized_transpose)
+    .set_attr<FInferStorageType>("FInferStorageType", QuantizedTransposeStorageType)
+    .set_attr<FResourceRequest>("FResourceRequest",
+                                [](const NodeAttrs& n) {
+                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+                                })
+    .set_attr<FComputeEx>("FComputeEx<cpu>",
+                          DNNLQuantizedTransposeForward<NumpyTransposeParam, NumpyTranspose<cpu>>)
+    .set_attr<bool>("TIsDNNL", true);
+
+NNVM_REGISTER_OP(_contrib_quantized_transpose)
+    .set_attr<FInferStorageType>("FInferStorageType", QuantizedTransposeStorageType)
+    .set_attr<FResourceRequest>("FResourceRequest",
+                                [](const NodeAttrs& n) {
+                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+                                })
+    .set_attr<FComputeEx>("FComputeEx<cpu>",
+                          DNNLQuantizedTransposeForward<TransposeParam, Transpose<cpu>>)
+    .set_attr<bool>("TIsDNNL", true);
+
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/quantized_transpose.cc b/src/operator/quantization/quantized_transpose.cc
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file quantized_transpose.cc
+ * \author: Rafal Litka, rafal.litka@intel.com
+ */
+#include <mxnet/op_attr_types.h>
+#include "../tensor/matrix_op-inl.h"
+#include "../numpy/np_matrix_op-inl.h"
+
+namespace mxnet {
+namespace op {
+
+inline bool QuantizedTransposeType(const nnvm::NodeAttrs& attrs,
+                                   std::vector<int>* in_attrs,
+                                   std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 3U);
+  CHECK_EQ(out_attrs->size(), 3U);
+  TYPE_ASSIGN_CHECK(*in_attrs, 1, mshadow::kFloat32);
+  TYPE_ASSIGN_CHECK(*in_attrs, 2, mshadow::kFloat32);
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]);
+  TYPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::kFloat32);
+  TYPE_ASSIGN_CHECK(*out_attrs, 2, mshadow::kFloat32);
+  return (*in_attrs)[0] != -1;
+}
+
+typedef bool (*TransposeShapeFunAny)(const nnvm::NodeAttrs&,
+                                     mxnet::ShapeVector*,
+                                     mxnet::ShapeVector*);
+
+template <TransposeShapeFunAny TransposeShapeFun>
+inline bool QuantizedTransposeShape(const nnvm::NodeAttrs& attrs,
+                                    mxnet::ShapeVector* in_attrs,
+                                    mxnet::ShapeVector* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 3U);
+  CHECK_EQ(out_attrs->size(), 3U);
+  mxnet::ShapeVector qin_attrs(1);
+  mxnet::ShapeVector qout_attrs(1);
+  SHAPE_ASSIGN_CHECK(qin_attrs, 0, (*in_attrs)[0]);
+  SHAPE_ASSIGN_CHECK(qout_attrs, 0, (*out_attrs)[0]);
+  bool ret = TransposeShapeFun(attrs, &qin_attrs, &qout_attrs);
+  SHAPE_ASSIGN_CHECK(*in_attrs, 0, qin_attrs[0]);
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, qout_attrs[0]);
+  SHAPE_ASSIGN_CHECK(*in_attrs, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*in_attrs, 2, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1});
+  return ret;
+}
+
+#define MXNET_OPERATOR_REGISTER_QUANTIZED_TRANSPOSE(name)                                    \
+  NNVM_REGISTER_OP(name)                                                                     \
+      .set_num_inputs(3)                                                                     \
+      .set_num_outputs(3)                                                                    \
+      .set_attr<nnvm::FInferType>("FInferType", QuantizedTransposeType)                      \
+      .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)                             \
+      .set_attr<nnvm::FListInputNames>(                                                      \
+          "FListInputNames",                                                                 \
+          [](const NodeAttrs& attrs) {                                                       \
+            return std::vector<std::string>{"data", "min_data", "max_data"};                 \
+          })                                                                                 \
+      .set_attr<nnvm::FListOutputNames>(                                                     \
+          "FListOutputNames",                                                                \
+          [](const NodeAttrs& attrs) {                                                       \
+            return std::vector<std::string>{"output", "min_output", "max_output"};           \
+          })                                                                                 \
+      .set_attr<FQuantizable>("FQuantizable",                                                \
+                              [](const NodeAttrs& attrs) { return QuantizeType::kSupport; }) \
+      .add_argument("data", "NDArray-or-Symbol", "Array to be transposed.")                  \
+      .add_argument("min_data",                                                              \
+                    "NDArray-or-Symbol",                                                     \
+                    "The minimum scalar value "                                              \
+                    "possibly produced for the data")                                        \
+      .add_argument("max_data",                                                              \
+                    "NDArray-or-Symbol",                                                     \
+                    "The maximum scalar value "                                              \
+                    "possibly produced for the data")
+
+MXNET_OPERATOR_REGISTER_QUANTIZED_TRANSPOSE(_npx_quantized_transpose)
+    .set_attr_parser(ParamParser<NumpyTransposeParam>)
+    .set_attr<mxnet::FInferShape>("FInferShape", QuantizedTransposeShape<NumpyTransposeShape>)
+    .add_arguments(NumpyTransposeParam::__FIELDS__());
+
+MXNET_OPERATOR_REGISTER_QUANTIZED_TRANSPOSE(_contrib_quantized_transpose)
+    .add_alias("quantized_transpose")
+    .set_attr_parser(ParamParser<TransposeParam>)
+    .set_attr<mxnet::FInferShape>("FInferShape", QuantizedTransposeShape<TransposeShape>)
+    .add_arguments(TransposeParam::__FIELDS__());
+
+NNVM_REGISTER_OP(transpose).set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) {
+  nnvm::ObjectPtr node = nnvm::Node::Create();
+  node->attrs.op       = Op::Get("_contrib_quantized_transpose");
+  node->attrs.name     = "quantized_" + attrs.name;
+  node->attrs.dict     = attrs.dict;
+  if (node->op()->attr_parser != nullptr) {
+    node->op()->attr_parser(&(node->attrs));
+  }
+  return node;
+});
+
+NNVM_REGISTER_OP(_npi_transpose).set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) {
+  nnvm::ObjectPtr node = nnvm::Node::Create();
+  node->attrs.op       = Op::Get("_npx_quantized_transpose");
+  node->attrs.name     = "quantized_" + attrs.name;
+  node->attrs.dict     = attrs.dict;
+  if (node->op()->attr_parser != nullptr) {
+    node->op()->attr_parser(&(node->attrs));
+  }
+  return node;
+});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/tests/python/dnnl/subgraphs/test_conv_subgraph.py b/tests/python/dnnl/subgraphs/test_conv_subgraph.py
diff --git a/tests/python/dnnl/subgraphs/test_fc_subgraph.py b/tests/python/dnnl/subgraphs/test_fc_subgraph.py
diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py

Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ DNNLTransposeFwd::DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArr`
`71`	`71`	`}`
`72`	`72`
`73`	`73`	`dnnl_memory_desc_t dst_fmt;`
`74`		`- dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides);`
	`74`	`+ dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, get_dnnl_type_t(data.dtype()), strides);`
`75`	`75`
`76`	`76`	`dst_md_ = std::make_shared<dnnl::memory::desc>(dst_fmt);`
`77`	`77`	`out_ = std::make_shared<dnnl::memory>(*dst_md_, engine, nullptr);`