Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 54e122c

Browse files
authored
quantized transpose operator (#20817)
* add quantized transpose * add quantized transpose test * add license to new files * check support, fix warnings * remove inplace quantized transpose * fix inplace quantized transpose * add quantized transpose test * separate transpose ops * review fixes * add fallbacks for NP and ND * fix formatting * fix formatting * update np transpose test * add operators to amp lists * add macro operator creation * add tests * review fix * remove relative includes
1 parent 6c2b3dc commit 54e122c

File tree

9 files changed

+367
-60
lines changed

9 files changed

+367
-60
lines changed

python/mxnet/amp/lists/symbol_fp16.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@
108108
'_contrib_quantized_elemwise_add',
109109
'_contrib_quantized_act',
110110
'_contrib_quantized_reshape',
111+
'_contrib_quantized_transpose',
111112
'_npx_quantized_reshape',
113+
'_npx_quantized_transpose',
112114
'_image_crop',
113115
'_linspace',
114116
'_contrib_requantize',

src/operator/nn/dnnl/dnnl_transpose.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ DNNLTransposeFwd::DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArr
7171
}
7272

7373
dnnl_memory_desc_t dst_fmt;
74-
dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides);
74+
dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, get_dnnl_type_t(data.dtype()), strides);
7575

7676
dst_md_ = std::make_shared<dnnl::memory::desc>(dst_fmt);
7777
out_ = std::make_shared<dnnl::memory>(*dst_md_, engine, nullptr);

src/operator/numpy/np_matrix_op-inl.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <vector>
2828
#include <algorithm>
2929
#include <string>
30+
#include <set>
3031
#include <utility>
3132
#include <unordered_map>
3233
#include "../tensor/matrix_op-inl.h"
@@ -175,6 +176,66 @@ void NumpyTranspose(const nnvm::NodeAttrs& attrs,
175176
}
176177
}
177178

179+
inline bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs,
180+
mxnet::ShapeVector* in_attrs,
181+
mxnet::ShapeVector* out_attrs) {
182+
const NumpyTransposeParam& param = nnvm::get<NumpyTransposeParam>(attrs.parsed);
183+
CHECK_EQ(in_attrs->size(), 1U);
184+
CHECK_EQ(out_attrs->size(), 1U);
185+
mxnet::TShape& shp = (*in_attrs)[0];
186+
mxnet::TShape& out_shp = (*out_attrs)[0];
187+
188+
int ndim = -1;
189+
if (ndim_is_known(shp)) {
190+
ndim = shp.ndim();
191+
} else if (ndim_is_known(out_shp)) {
192+
ndim = out_shp.ndim();
193+
}
194+
if (ndim < 0) {
195+
return false;
196+
}
197+
if (out_shp.ndim() >= 0 && shp.ndim() >= 0) {
198+
CHECK_EQ(out_shp.ndim(), shp.ndim());
199+
}
200+
201+
mxnet::TShape get(ndim, -1);
202+
mxnet::TShape ret(ndim, -1);
203+
204+
if (ndim_is_known(param.axes)) {
205+
CHECK_EQ(ndim, param.axes.ndim())
206+
<< "The number of axes does not match the dimension of the tensor. axes = " << param.axes
207+
<< ", input tensor shape = " << shp;
208+
mxnet::TShape axes = common::CanonicalizeAxes(param.axes);
209+
std::set<dim_t> axes_set(axes.begin(), axes.end());
210+
CHECK_EQ(axes_set.size(), axes.ndim()) << "ValueError: Repeated axis in transpose."
211+
<< " param.axes = " << param.axes;
212+
if (ndim_is_known(shp)) {
213+
for (int i = 0; i < ndim; ++i) {
214+
ret[i] = shp[axes[i]];
215+
}
216+
}
217+
if (ndim_is_known(out_shp)) {
218+
for (int i = 0; i < ndim; ++i) {
219+
get[axes[i]] = out_shp[i];
220+
}
221+
}
222+
} else {
223+
if (ndim_is_known(shp)) {
224+
for (int i = 0; i < ndim; ++i) {
225+
ret[i] = shp[ndim - 1 - i];
226+
}
227+
}
228+
if (ndim_is_known(out_shp)) {
229+
for (int i = 0; i < ndim; ++i) {
230+
get[ndim - 1 - i] = out_shp[i];
231+
}
232+
}
233+
}
234+
SHAPE_ASSIGN_CHECK(*in_attrs, 0, get);
235+
SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
236+
return shape_is_known(*in_attrs) && shape_is_known(*out_attrs);
237+
}
238+
178239
template <typename xpu>
179240
void NumpyColumnStackForward(const nnvm::NodeAttrs& attrs,
180241
const OpContext& ctx,

src/operator/numpy/np_matrix_op.cc

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -45,65 +45,6 @@ DMLC_REGISTER_PARAMETER(NumpyDiagParam);
4545
DMLC_REGISTER_PARAMETER(NumpyDiagonalParam);
4646
DMLC_REGISTER_PARAMETER(NumpyDiagflatParam);
4747

48-
bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs,
49-
mxnet::ShapeVector* in_attrs,
50-
mxnet::ShapeVector* out_attrs) {
51-
const NumpyTransposeParam& param = nnvm::get<NumpyTransposeParam>(attrs.parsed);
52-
CHECK_EQ(in_attrs->size(), 1U);
53-
CHECK_EQ(out_attrs->size(), 1U);
54-
mxnet::TShape& shp = (*in_attrs)[0];
55-
mxnet::TShape& out_shp = (*out_attrs)[0];
56-
57-
int ndim = -1;
58-
if (ndim_is_known(shp)) {
59-
ndim = shp.ndim();
60-
} else if (ndim_is_known(out_shp)) {
61-
ndim = out_shp.ndim();
62-
}
63-
if (ndim < 0) {
64-
return false;
65-
}
66-
if (out_shp.ndim() >= 0 && shp.ndim() >= 0) {
67-
CHECK_EQ(out_shp.ndim(), shp.ndim());
68-
}
69-
70-
mxnet::TShape get(ndim, -1);
71-
mxnet::TShape ret(ndim, -1);
72-
73-
if (ndim_is_known(param.axes)) {
74-
CHECK_EQ(ndim, param.axes.ndim())
75-
<< "The number of axes does not match the dimension of the tensor. axes = " << param.axes
76-
<< ", input tensor shape = " << shp;
77-
mxnet::TShape axes = common::CanonicalizeAxes(param.axes);
78-
std::set<dim_t> axes_set(axes.begin(), axes.end());
79-
CHECK_EQ(axes_set.size(), axes.ndim()) << "ValueError: Repeated axis in transpose."
80-
<< " param.axes = " << param.axes;
81-
if (ndim_is_known(shp)) {
82-
for (int i = 0; i < ndim; ++i) {
83-
ret[i] = shp[axes[i]];
84-
}
85-
}
86-
if (ndim_is_known(out_shp)) {
87-
for (int i = 0; i < ndim; ++i) {
88-
get[axes[i]] = out_shp[i];
89-
}
90-
}
91-
} else {
92-
if (ndim_is_known(shp)) {
93-
for (int i = 0; i < ndim; ++i) {
94-
ret[i] = shp[ndim - 1 - i];
95-
}
96-
}
97-
if (ndim_is_known(out_shp)) {
98-
for (int i = 0; i < ndim; ++i) {
99-
get[ndim - 1 - i] = out_shp[i];
100-
}
101-
}
102-
}
103-
SHAPE_ASSIGN_CHECK(*in_attrs, 0, get);
104-
SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
105-
return shape_is_known(*in_attrs) && shape_is_known(*out_attrs);
106-
}
10748
#if MXNET_USE_ONEDNN == 1
10849

10950
static void NumpyTransposeComputeExCPU(const nnvm::NodeAttrs& attrs,
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
2+
/*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*/
20+
21+
/*!
22+
* \file dnnl_quantized_transpose.cc
23+
* \author: Rafal Litka, rafal.litka@intel.com
24+
*/
25+
#if MXNET_USE_ONEDNN == 1
26+
#include "operator/numpy/np_matrix_op-inl.h"
27+
#include "operator/tensor/matrix_op-inl.h"
28+
#include "operator/nn/dnnl/dnnl_transpose-inl.h"
29+
30+
namespace mxnet {
31+
namespace op {
32+
33+
inline static bool QuantizedTransposeStorageType(const nnvm::NodeAttrs& attrs,
34+
const int dev_mask,
35+
DispatchMode* dispatch_mode,
36+
std::vector<int>* in_attrs,
37+
std::vector<int>* out_attrs) {
38+
CHECK_EQ(in_attrs->size(), 3U);
39+
CHECK_EQ(out_attrs->size(), 3U);
40+
return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
41+
}
42+
43+
bool SupportDNNLQuantizedTranspose(const NDArray& data) {
44+
auto data_ndim = data.shape().ndim();
45+
46+
if (data_ndim > 4 || data_ndim == 0 || data.shape().Size() == 0)
47+
return false;
48+
49+
return true;
50+
}
51+
typedef void (*TransposeFallbackFunAny)(const nnvm::NodeAttrs&,
52+
const OpContext&,
53+
const std::vector<TBlob>&,
54+
const std::vector<OpReqType>&,
55+
const std::vector<TBlob>&);
56+
57+
template <class ParamType, TransposeFallbackFunAny TransposeFallback>
58+
static void DNNLQuantizedTransposeForward(const nnvm::NodeAttrs& attrs,
59+
const OpContext& ctx,
60+
const std::vector<NDArray>& inputs,
61+
const std::vector<OpReqType>& req,
62+
const std::vector<NDArray>& outputs) {
63+
CHECK(inputs[0].dtype() == mshadow::kUint8 || inputs[0].dtype() == mshadow::kInt8)
64+
<< "dnnl_quantized_transpose only supports uint8 and int8 as input type";
65+
if (req[0] == kNullOp) {
66+
return;
67+
}
68+
CHECK_EQ(inputs.size(), 3U);
69+
CHECK_EQ(outputs.size(), 3U);
70+
if (SupportDNNLQuantizedTranspose(inputs[0])) {
71+
DNNLRun(DNNLTransposeForward<ParamType>, attrs, ctx, inputs[0], req[0], outputs[0]);
72+
} else {
73+
FallBackCompute(TransposeFallback, attrs, ctx, inputs, req, outputs);
74+
}
75+
outputs[1].data().dptr<float>()[0] = inputs[1].data().dptr<float>()[0];
76+
outputs[2].data().dptr<float>()[0] = inputs[2].data().dptr<float>()[0];
77+
}
78+
79+
NNVM_REGISTER_OP(_npx_quantized_transpose)
80+
.set_attr<FInferStorageType>("FInferStorageType", QuantizedTransposeStorageType)
81+
.set_attr<FResourceRequest>("FResourceRequest",
82+
[](const NodeAttrs& n) {
83+
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
84+
})
85+
.set_attr<FComputeEx>("FComputeEx<cpu>",
86+
DNNLQuantizedTransposeForward<NumpyTransposeParam, NumpyTranspose<cpu>>)
87+
.set_attr<bool>("TIsDNNL", true);
88+
89+
NNVM_REGISTER_OP(_contrib_quantized_transpose)
90+
.set_attr<FInferStorageType>("FInferStorageType", QuantizedTransposeStorageType)
91+
.set_attr<FResourceRequest>("FResourceRequest",
92+
[](const NodeAttrs& n) {
93+
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
94+
})
95+
.set_attr<FComputeEx>("FComputeEx<cpu>",
96+
DNNLQuantizedTransposeForward<TransposeParam, Transpose<cpu>>)
97+
.set_attr<bool>("TIsDNNL", true);
98+
99+
} // namespace op
100+
} // namespace mxnet
101+
102+
#endif // MXNET_USE_ONEDNN == 1
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* \file quantized_transpose.cc
22+
* \author: Rafal Litka, rafal.litka@intel.com
23+
*/
24+
#include <mxnet/op_attr_types.h>
25+
#include "../tensor/matrix_op-inl.h"
26+
#include "../numpy/np_matrix_op-inl.h"
27+
28+
namespace mxnet {
29+
namespace op {
30+
31+
inline bool QuantizedTransposeType(const nnvm::NodeAttrs& attrs,
32+
std::vector<int>* in_attrs,
33+
std::vector<int>* out_attrs) {
34+
CHECK_EQ(in_attrs->size(), 3U);
35+
CHECK_EQ(out_attrs->size(), 3U);
36+
TYPE_ASSIGN_CHECK(*in_attrs, 1, mshadow::kFloat32);
37+
TYPE_ASSIGN_CHECK(*in_attrs, 2, mshadow::kFloat32);
38+
TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]);
39+
TYPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::kFloat32);
40+
TYPE_ASSIGN_CHECK(*out_attrs, 2, mshadow::kFloat32);
41+
return (*in_attrs)[0] != -1;
42+
}
43+
44+
typedef bool (*TransposeShapeFunAny)(const nnvm::NodeAttrs&,
45+
mxnet::ShapeVector*,
46+
mxnet::ShapeVector*);
47+
48+
template <TransposeShapeFunAny TransposeShapeFun>
49+
inline bool QuantizedTransposeShape(const nnvm::NodeAttrs& attrs,
50+
mxnet::ShapeVector* in_attrs,
51+
mxnet::ShapeVector* out_attrs) {
52+
CHECK_EQ(in_attrs->size(), 3U);
53+
CHECK_EQ(out_attrs->size(), 3U);
54+
mxnet::ShapeVector qin_attrs(1);
55+
mxnet::ShapeVector qout_attrs(1);
56+
SHAPE_ASSIGN_CHECK(qin_attrs, 0, (*in_attrs)[0]);
57+
SHAPE_ASSIGN_CHECK(qout_attrs, 0, (*out_attrs)[0]);
58+
bool ret = TransposeShapeFun(attrs, &qin_attrs, &qout_attrs);
59+
SHAPE_ASSIGN_CHECK(*in_attrs, 0, qin_attrs[0]);
60+
SHAPE_ASSIGN_CHECK(*out_attrs, 0, qout_attrs[0]);
61+
SHAPE_ASSIGN_CHECK(*in_attrs, 1, mxnet::TShape{1});
62+
SHAPE_ASSIGN_CHECK(*in_attrs, 2, mxnet::TShape{1});
63+
SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1});
64+
SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1});
65+
return ret;
66+
}
67+
68+
#define MXNET_OPERATOR_REGISTER_QUANTIZED_TRANSPOSE(name) \
69+
NNVM_REGISTER_OP(name) \
70+
.set_num_inputs(3) \
71+
.set_num_outputs(3) \
72+
.set_attr<nnvm::FInferType>("FInferType", QuantizedTransposeType) \
73+
.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes) \
74+
.set_attr<nnvm::FListInputNames>( \
75+
"FListInputNames", \
76+
[](const NodeAttrs& attrs) { \
77+
return std::vector<std::string>{"data", "min_data", "max_data"}; \
78+
}) \
79+
.set_attr<nnvm::FListOutputNames>( \
80+
"FListOutputNames", \
81+
[](const NodeAttrs& attrs) { \
82+
return std::vector<std::string>{"output", "min_output", "max_output"}; \
83+
}) \
84+
.set_attr<FQuantizable>("FQuantizable", \
85+
[](const NodeAttrs& attrs) { return QuantizeType::kSupport; }) \
86+
.add_argument("data", "NDArray-or-Symbol", "Array to be transposed.") \
87+
.add_argument("min_data", \
88+
"NDArray-or-Symbol", \
89+
"The minimum scalar value " \
90+
"possibly produced for the data") \
91+
.add_argument("max_data", \
92+
"NDArray-or-Symbol", \
93+
"The maximum scalar value " \
94+
"possibly produced for the data")
95+
96+
MXNET_OPERATOR_REGISTER_QUANTIZED_TRANSPOSE(_npx_quantized_transpose)
97+
.set_attr_parser(ParamParser<NumpyTransposeParam>)
98+
.set_attr<mxnet::FInferShape>("FInferShape", QuantizedTransposeShape<NumpyTransposeShape>)
99+
.add_arguments(NumpyTransposeParam::__FIELDS__());
100+
101+
MXNET_OPERATOR_REGISTER_QUANTIZED_TRANSPOSE(_contrib_quantized_transpose)
102+
.add_alias("quantized_transpose")
103+
.set_attr_parser(ParamParser<TransposeParam>)
104+
.set_attr<mxnet::FInferShape>("FInferShape", QuantizedTransposeShape<TransposeShape>)
105+
.add_arguments(TransposeParam::__FIELDS__());
106+
107+
NNVM_REGISTER_OP(transpose).set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) {
108+
nnvm::ObjectPtr node = nnvm::Node::Create();
109+
node->attrs.op = Op::Get("_contrib_quantized_transpose");
110+
node->attrs.name = "quantized_" + attrs.name;
111+
node->attrs.dict = attrs.dict;
112+
if (node->op()->attr_parser != nullptr) {
113+
node->op()->attr_parser(&(node->attrs));
114+
}
115+
return node;
116+
});
117+
118+
NNVM_REGISTER_OP(_npi_transpose).set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) {
119+
nnvm::ObjectPtr node = nnvm::Node::Create();
120+
node->attrs.op = Op::Get("_npx_quantized_transpose");
121+
node->attrs.name = "quantized_" + attrs.name;
122+
node->attrs.dict = attrs.dict;
123+
if (node->op()->attr_parser != nullptr) {
124+
node->op()->attr_parser(&(node->attrs));
125+
}
126+
return node;
127+
});
128+
129+
} // namespace op
130+
} // namespace mxnet

0 commit comments

Comments
 (0)