Skip to content

Commit 17a2003

Browse files
[Inference TRT] elementwise layer support (#43851)
* elementwise support * commit
1 parent ff70a26 commit 17a2003

File tree

3 files changed

+427
-293
lines changed

3 files changed

+427
-293
lines changed

paddle/fluid/inference/tensorrt/convert/elementwise_op.cc

Lines changed: 94 additions & 219 deletions
Original file line numberDiff line numberDiff line change
@@ -19,236 +19,115 @@ namespace paddle {
1919
namespace inference {
2020
namespace tensorrt {
2121

22-
static bool CheckDims(const nvinfer1::Dims& dims_x,
23-
const nvinfer1::Dims& dims_y) {
24-
if (dims_x.nbDims != dims_y.nbDims) {
25-
return false;
26-
}
27-
for (int i = 0; i < dims_x.nbDims; i++) {
28-
if (dims_x.d[i] != dims_y.d[i]) {
29-
return false;
30-
}
31-
}
32-
return true;
33-
}
34-
35-
class ElementwiseWeightOpConverter : public OpConverter {
22+
class ElementwiseTensorOpConverter : public OpConverter {
3623
public:
37-
ElementwiseWeightOpConverter() {}
24+
ElementwiseTensorOpConverter() {}
3825
void operator()(const framework::proto::OpDesc& op,
39-
const framework::Scope& scope, bool test_mode) override {
40-
// Here the two nullptr looks strange, that's because the
41-
// framework::OpDesc's constructor is strange.
42-
nvinfer1::ILayer* layer = nullptr;
26+
const framework::Scope& scope,
27+
bool test_mode) override {
28+
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
4329
framework::OpDesc op_desc(op, nullptr);
44-
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
45-
4630
auto* X = engine_->GetITensor(op_desc.Input("X").front());
31+
nvinfer1::ITensor* Y = nullptr;
4732
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
48-
PADDLE_ENFORCE_NOT_NULL(
49-
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
50-
op_desc.Input("Y").front().c_str()));
51-
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
52-
float* weight_data = nullptr;
53-
auto output_name = op_desc.Output("Out")[0];
54-
weight_data = engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
55-
nvinfer1::Dims dims_x = X->getDimensions();
56-
57-
auto regist_eltwise_weight = [&](nvinfer1::ScaleMode scale_mode) {
58-
TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT,
59-
static_cast<void*>(weight_data),
60-
static_cast<size_t>(Y_t->numel())};
61-
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr,
62-
0};
63-
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
64-
0};
65-
66-
nvinfer1::IShuffleLayer* expand_layer = nullptr;
67-
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
68-
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
69-
auto input_dim = X->getDimensions();
70-
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
71-
nvinfer1::Dims expand_shape;
72-
expand_shape.nbDims = 3 + dynamic_shape_offset;
73-
for (int i = 0; i < expand_shape.nbDims; i++) {
74-
if (i < input_dim.nbDims) {
75-
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
76-
} else {
77-
expand_shape.d[i] = 1;
78-
}
79-
}
80-
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
81-
expand_layer->setReshapeDimensions(expand_shape);
82-
X = expand_layer->getOutput(0);
83-
expand_layer->getOutput(0)->setName(
84-
("elementwise_reshape_out: " + output_name).c_str());
85-
expand_layer->setName(
86-
("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
87-
}
88-
if (op_type_ == "add") {
89-
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
90-
engine_, ScaleNd, *X, scale_mode, shift_weights.get(),
91-
scale_weights.get(), power_weights.get(), dynamic_shape_offset);
92-
layer = scale_layer;
93-
} else if (op_type_ == "mul") {
94-
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
95-
engine_, Scale, *X, scale_mode, scale_weights.get(),
96-
shift_weights.get(), power_weights.get());
97-
layer = scale_layer;
98-
}
99-
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
100-
nvinfer1::Dims squeeze_shape;
101-
squeeze_shape.nbDims = input_dim.nbDims;
102-
for (int i = 0; i < squeeze_shape.nbDims; i++) {
103-
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
104-
}
105-
squeeze_layer =
106-
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
107-
squeeze_layer->setReshapeDimensions(squeeze_shape);
108-
RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
109-
{output_name}, test_mode);
110-
} else {
111-
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
112-
{output_name}, test_mode);
113-
}
114-
};
115-
116-
if (engine_->with_dynamic_shape()) {
117-
if (Y_t->dims().size() == 1) {
118-
auto scale_mode = nvinfer1::ScaleMode::kCHANNEL;
119-
PADDLE_ENFORCE_EQ(Y_t->dims()[0], dims_x.d[1],
120-
platform::errors::InvalidArgument(
121-
"The Bias's size(%d) should be equal to the "
122-
"first dim(%d) of the Input.",
123-
Y_t->dims()[0], dims_x.d[1]));
124-
regist_eltwise_weight(scale_mode);
125-
} else {
126-
PADDLE_THROW(platform::errors::InvalidArgument(
127-
"The size of input bias's dims is %d, but TensorRT dynamic shape "
128-
"only support size = 1 for Elementwise op!",
129-
Y_t->dims().size()));
33+
if (Y_v) {
34+
// Y is weight
35+
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
36+
float* weight_data =
37+
engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
38+
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
39+
TensorRTEngine::Weight y_weight{nvinfer1::DataType::kFLOAT,
40+
static_cast<void*>(weight_data),
41+
static_cast<size_t>(Y_t->numel())};
42+
nvinfer1::Dims trt_dims_y;
43+
trt_dims_y.nbDims = dims_y.size();
44+
for (int i = 0; i < trt_dims_y.nbDims; i++) {
45+
trt_dims_y.d[i] = dims_y[i];
13046
}
131-
return;
47+
Y = TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims_y, y_weight.get())
48+
->getOutput(0);
49+
} else {
50+
Y = engine_->GetITensor(op_desc.Input("Y").front());
13251
}
13352

134-
std::vector<int> no_batch_dims;
135-
int start_index = 0;
136-
137-
for (; start_index < dims_x.nbDims; start_index++)
138-
no_batch_dims.push_back(dims_x.d[start_index]);
139-
140-
auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
53+
if (X->getDimensions().nbDims < Y->getDimensions().nbDims) {
54+
auto* tmp = X;
55+
X = Y;
56+
Y = tmp;
57+
}
58+
nvinfer1::Dims dims_x = X->getDimensions();
59+
nvinfer1::Dims dims_y = Y->getDimensions();
60+
auto output_name = op_desc.Output("Out")[0];
14161

142-
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
143-
if (dims_y.size() == no_batch_dims.size() + 1) {
144-
if (dims_y[0] == 1) dims_y.erase(dims_y.begin());
62+
// axis here is relative to explicit batch
63+
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
64+
int real_x_rank = dims_x.nbDims;
65+
int real_y_rank = dims_y.nbDims;
66+
if (!engine_->with_dynamic_shape()) {
67+
real_x_rank++;
68+
real_y_rank++;
69+
if (Y_v) real_y_rank--;
70+
}
71+
if (axis == -1) {
72+
axis = real_x_rank - real_y_rank;
73+
}
74+
if (!engine_->with_dynamic_shape() && axis > 0) {
75+
axis--;
14576
}
14677

147-
if (dims_y.size() == 1 && dims_y[0] == no_batch_dims[0]) {
148-
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
149-
} else if (dims_y.size() == no_batch_dims.size() &&
150-
dims_y[0] == no_batch_dims[0]) {
151-
scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
152-
for (size_t i = 1; i < no_batch_dims.size(); i++) {
153-
if (dims_y[i] != no_batch_dims[i]) {
154-
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
155-
break;
78+
// X: - - - - - - -
79+
// axis
80+
// Y: - - -
81+
// we need expand Y's rank = X's rank
82+
int left_one_num = axis;
83+
int right_one_num = dims_x.nbDims - axis - dims_y.nbDims;
84+
nvinfer1::IShuffleLayer* reshape_layer;
85+
nvinfer1::ITensor* reshape_y_tensor;
86+
if (left_one_num > 0 || right_one_num > 0) {
87+
if (engine_->with_dynamic_shape()) {
88+
auto* y_shape_tensor = Shape(Y);
89+
auto* new_y_shape_tensor = y_shape_tensor;
90+
if (axis > 0) {
91+
std::vector<int32_t> left_one(left_one_num, 1);
92+
auto* left_one_tensor = Add1DConstantLayer(left_one);
93+
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
94+
left_one_tensor, new_y_shape_tensor});
15695
}
157-
}
158-
if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) {
159-
for (size_t i = 1; i < no_batch_dims.size(); i++) {
160-
if (dims_y[i] != 1)
161-
PADDLE_THROW(platform::errors::InvalidArgument(
162-
"The bias's %d dim is %d, but TensorRT dynamic shape only "
163-
"support it equals to 1 for Elementwise op!",
164-
i, dims_y[i]));
96+
if (right_one_num > 0) {
97+
std::vector<int32_t> right_one(right_one_num, 1);
98+
auto* right_one_tensor = Add1DConstantLayer(right_one);
99+
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
100+
new_y_shape_tensor, right_one_tensor});
165101
}
166-
}
167-
} else {
168-
if (dims_y.size() >= 1) {
169-
PADDLE_THROW(platform::errors::InvalidArgument(
170-
"The size of bias's dims is %d and bias's size is %d. TensorRT "
171-
"doesn't support this shape for Elementwise op!",
172-
dims_y.size(), dims_y[0]));
102+
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
103+
reshape_layer->setInput(1, *new_y_shape_tensor);
173104
} else {
174-
PADDLE_THROW(platform::errors::InvalidArgument(
175-
"The size of bias's dims is %d. TensorRT doesn't support "
176-
"this shape for Elementwise op!",
177-
dims_y.size()));
105+
nvinfer1::Dims new_y_dims;
106+
new_y_dims.nbDims = left_one_num + dims_y.nbDims + right_one_num;
107+
for (int i = 0; i < new_y_dims.nbDims; i++) new_y_dims.d[i] = 1;
108+
for (int i = 0; i < dims_y.nbDims; i++)
109+
new_y_dims.d[left_one_num + i] = dims_y.d[i];
110+
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
111+
reshape_layer->setReshapeDimensions(new_y_dims);
178112
}
113+
reshape_y_tensor = reshape_layer->getOutput(0);
114+
} else {
115+
// In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
116+
// 6015 faling, how ridiculous!
117+
reshape_y_tensor = Y;
179118
}
180-
regist_eltwise_weight(scale_mode);
181-
}
182-
183-
protected:
184-
std::string op_type_;
185-
};
186119

187-
class ElementwiseTensorOpConverter : public OpConverter {
188-
public:
189-
ElementwiseTensorOpConverter() {}
190-
void operator()(const framework::proto::OpDesc& op,
191-
const framework::Scope& scope, bool test_mode) override {
192120
auto op_pair = ops.find(op_type_);
193-
PADDLE_ENFORCE_NE(op_pair, ops.end(),
121+
PADDLE_ENFORCE_NE(op_pair,
122+
ops.end(),
194123
platform::errors::InvalidArgument(
195124
"Elementwise op's type(%s) is not supported. Please "
196125
"check if the op_type is correct.",
197126
op_type_));
198127

199-
// Here the two nullptr looks strange, that's because the
200-
// framework::OpDesc's constructor is strange.
201-
framework::OpDesc op_desc(op, nullptr);
202-
nvinfer1::ILayer* layer = nullptr;
203-
204-
auto* X = engine_->GetITensor(op_desc.Input("X").front());
205-
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
206-
std::vector<nvinfer1::ITensor*> itensors;
207-
itensors.push_back(X);
208-
itensors.push_back(Y);
209-
nvinfer1::Dims dims_x = X->getDimensions();
210-
nvinfer1::Dims dims_y = Y->getDimensions();
211-
212-
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
213-
auto output_name = op_desc.Output("Out")[0];
214-
215-
auto common_func = [&](nvinfer1::ILayer* layer) {
216-
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
217-
};
218-
219-
if (dims_x.nbDims == dims_y.nbDims) {
220-
// The two input tensor should have the same dims
221-
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
222-
nvinfer1::IElementWiseLayer* elet_layer =
223-
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *X, *Y, op_pair->second);
224-
225-
layer = elet_layer;
226-
} else {
227-
VLOG(3) << "Convert a fluid elementwise op to TensorRT "
228-
"ElementWisePluginLayer";
229-
if (engine_->with_dynamic_shape()) {
230-
#if IS_TRT_VERSION_GE(6000)
231-
plugin::ElementwisePluginDynamic* plugin =
232-
new plugin::ElementwisePluginDynamic(op_type_, axis);
233-
layer = engine_->AddDynamicPlugin(itensors.data(), 2, plugin);
234-
#else
235-
PADDLE_THROW(platform::errors::Fatal(
236-
"You are running the TRT Dynamic Shape mode, need to confirm that "
237-
"your TRT version is no less than 6.0"));
238-
#endif
239-
} else {
240-
plugin::ElementWisePlugin* plugin =
241-
new plugin::ElementWisePlugin(op_type_, dims_x, dims_y, axis);
242-
243-
std::vector<nvinfer1::ITensor*> inputs{X, Y};
244-
auto* plugin_layer = engine_->AddPlugin(
245-
inputs.data(), inputs.size(),
246-
reinterpret_cast<plugin::PluginTensorRT*>(plugin));
247-
248-
layer = plugin_layer;
249-
}
250-
}
251-
common_func(layer);
128+
auto* layer = TRT_ENGINE_ADD_LAYER(
129+
engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second);
130+
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
252131
}
253132

254133
protected:
@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
268147
{"max", nvinfer1::ElementWiseOperation::kMAX},
269148
};
270149

271-
class ElementwiseWeightAddOpConverter : public ElementwiseWeightOpConverter {
272-
public:
273-
ElementwiseWeightAddOpConverter() { op_type_ = "add"; }
274-
};
275-
276-
class ElementwiseWeightMulOpConverter : public ElementwiseWeightOpConverter {
277-
public:
278-
ElementwiseWeightMulOpConverter() { op_type_ = "mul"; }
279-
};
280-
281150
class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
282151
public:
283152
ElementwiseTensorAddOpConverter() { op_type_ = "add"; }
@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
318187
} // namespace paddle
319188

320189
REGISTER_TRT_OP_CONVERTER(elementwise_add_weight,
321-
ElementwiseWeightAddOpConverter);
190+
ElementwiseTensorAddOpConverter);
322191
REGISTER_TRT_OP_CONVERTER(elementwise_mul_weight,
323-
ElementwiseWeightMulOpConverter);
192+
ElementwiseTensorMulOpConverter);
193+
REGISTER_TRT_OP_CONVERTER(elementwise_sub_weight,
194+
ElementwiseTensorSubOpConverter);
195+
REGISTER_TRT_OP_CONVERTER(elementwise_div_weight,
196+
ElementwiseTensorDivOpConverter);
197+
REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight,
198+
ElementwiseTensorPowOpConverter);
324199

325200
REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor,
326201
ElementwiseTensorAddOpConverter);

0 commit comments

Comments
 (0)