@@ -19,236 +19,115 @@ namespace paddle {
19
19
namespace inference {
20
20
namespace tensorrt {
21
21
22
- static bool CheckDims (const nvinfer1::Dims& dims_x,
23
- const nvinfer1::Dims& dims_y) {
24
- if (dims_x.nbDims != dims_y.nbDims ) {
25
- return false ;
26
- }
27
- for (int i = 0 ; i < dims_x.nbDims ; i++) {
28
- if (dims_x.d [i] != dims_y.d [i]) {
29
- return false ;
30
- }
31
- }
32
- return true ;
33
- }
34
-
35
- class ElementwiseWeightOpConverter : public OpConverter {
22
+ class ElementwiseTensorOpConverter : public OpConverter {
36
23
public:
37
- ElementwiseWeightOpConverter () {}
24
+ ElementwiseTensorOpConverter () {}
38
25
void operator ()(const framework::proto::OpDesc& op,
39
- const framework::Scope& scope, bool test_mode) override {
40
- // Here the two nullptr looks strange, that's because the
41
- // framework::OpDesc's constructor is strange.
42
- nvinfer1::ILayer* layer = nullptr ;
26
+ const framework::Scope& scope,
27
+ bool test_mode) override {
28
+ VLOG (3 ) << " Convert a fluid elementwise op to TensorRT IElementWiseLayer" ;
43
29
framework::OpDesc op_desc (op, nullptr );
44
- VLOG (3 ) << " Convert a fluid elementwise op to TensorRT IScaleLayer" ;
45
-
46
30
auto * X = engine_->GetITensor (op_desc.Input (" X" ).front ());
31
+ nvinfer1::ITensor* Y = nullptr ;
47
32
auto * Y_v = scope.FindVar (op_desc.Input (" Y" ).front ());
48
- PADDLE_ENFORCE_NOT_NULL (
49
- Y_v, platform::errors::NotFound (" Variable %s not found in scope." ,
50
- op_desc.Input (" Y" ).front ().c_str ()));
51
- auto * Y_t = Y_v->GetMutable <framework::LoDTensor>();
52
- float * weight_data = nullptr ;
53
- auto output_name = op_desc.Output (" Out" )[0 ];
54
- weight_data = engine_->GetWeightCPUData (op_desc.Input (" Y" ).front (), Y_t);
55
- nvinfer1::Dims dims_x = X->getDimensions ();
56
-
57
- auto regist_eltwise_weight = [&](nvinfer1::ScaleMode scale_mode) {
58
- TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT ,
59
- static_cast <void *>(weight_data),
60
- static_cast <size_t >(Y_t->numel ())};
61
- TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT , nullptr ,
62
- 0 };
63
- TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT , nullptr ,
64
- 0 };
65
-
66
- nvinfer1::IShuffleLayer* expand_layer = nullptr ;
67
- nvinfer1::IShuffleLayer* squeeze_layer = nullptr ;
68
- int dynamic_shape_offset = engine_->with_dynamic_shape () ? 1 : 0 ;
69
- auto input_dim = X->getDimensions ();
70
- if (input_dim.nbDims < 3 + dynamic_shape_offset) {
71
- nvinfer1::Dims expand_shape;
72
- expand_shape.nbDims = 3 + dynamic_shape_offset;
73
- for (int i = 0 ; i < expand_shape.nbDims ; i++) {
74
- if (i < input_dim.nbDims ) {
75
- expand_shape.d [i] = input_dim.d [i] < 0 ? 0 : input_dim.d [i];
76
- } else {
77
- expand_shape.d [i] = 1 ;
78
- }
79
- }
80
- expand_layer = TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *X);
81
- expand_layer->setReshapeDimensions (expand_shape);
82
- X = expand_layer->getOutput (0 );
83
- expand_layer->getOutput (0 )->setName (
84
- (" elementwise_reshape_out: " + output_name).c_str ());
85
- expand_layer->setName (
86
- (" Elewise: Shuffle: (Output: " + output_name + " )" ).c_str ());
87
- }
88
- if (op_type_ == " add" ) {
89
- nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER (
90
- engine_, ScaleNd, *X, scale_mode, shift_weights.get (),
91
- scale_weights.get (), power_weights.get (), dynamic_shape_offset);
92
- layer = scale_layer;
93
- } else if (op_type_ == " mul" ) {
94
- nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER (
95
- engine_, Scale, *X, scale_mode, scale_weights.get (),
96
- shift_weights.get (), power_weights.get ());
97
- layer = scale_layer;
98
- }
99
- if (input_dim.nbDims < 3 + dynamic_shape_offset) {
100
- nvinfer1::Dims squeeze_shape;
101
- squeeze_shape.nbDims = input_dim.nbDims ;
102
- for (int i = 0 ; i < squeeze_shape.nbDims ; i++) {
103
- squeeze_shape.d [i] = input_dim.d [i] < 0 ? 0 : input_dim.d [i];
104
- }
105
- squeeze_layer =
106
- TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *(layer->getOutput (0 )));
107
- squeeze_layer->setReshapeDimensions (squeeze_shape);
108
- RreplenishLayerAndOutput (squeeze_layer, " elementwise_" + op_type_,
109
- {output_name}, test_mode);
110
- } else {
111
- RreplenishLayerAndOutput (layer, " elementwise_" + op_type_,
112
- {output_name}, test_mode);
113
- }
114
- };
115
-
116
- if (engine_->with_dynamic_shape ()) {
117
- if (Y_t->dims ().size () == 1 ) {
118
- auto scale_mode = nvinfer1::ScaleMode::kCHANNEL ;
119
- PADDLE_ENFORCE_EQ (Y_t->dims ()[0 ], dims_x.d [1 ],
120
- platform::errors::InvalidArgument (
121
- " The Bias's size(%d) should be equal to the "
122
- " first dim(%d) of the Input." ,
123
- Y_t->dims ()[0 ], dims_x.d [1 ]));
124
- regist_eltwise_weight (scale_mode);
125
- } else {
126
- PADDLE_THROW (platform::errors::InvalidArgument (
127
- " The size of input bias's dims is %d, but TensorRT dynamic shape "
128
- " only support size = 1 for Elementwise op!" ,
129
- Y_t->dims ().size ()));
33
+ if (Y_v) {
34
+ // Y is weight
35
+ auto * Y_t = Y_v->GetMutable <framework::LoDTensor>();
36
+ float * weight_data =
37
+ engine_->GetWeightCPUData (op_desc.Input (" Y" ).front (), Y_t);
38
+ std::vector<int > dims_y = phi::vectorize<int >(Y_t->dims ());
39
+ TensorRTEngine::Weight y_weight{nvinfer1::DataType::kFLOAT ,
40
+ static_cast <void *>(weight_data),
41
+ static_cast <size_t >(Y_t->numel ())};
42
+ nvinfer1::Dims trt_dims_y;
43
+ trt_dims_y.nbDims = dims_y.size ();
44
+ for (int i = 0 ; i < trt_dims_y.nbDims ; i++) {
45
+ trt_dims_y.d [i] = dims_y[i];
130
46
}
131
- return ;
47
+ Y = TRT_ENGINE_ADD_LAYER (engine_, Constant, trt_dims_y, y_weight.get ())
48
+ ->getOutput (0 );
49
+ } else {
50
+ Y = engine_->GetITensor (op_desc.Input (" Y" ).front ());
132
51
}
133
52
134
- std::vector<int > no_batch_dims;
135
- int start_index = 0 ;
136
-
137
- for (; start_index < dims_x.nbDims ; start_index++)
138
- no_batch_dims.push_back (dims_x.d [start_index]);
139
-
140
- auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE ;
53
+ if (X->getDimensions ().nbDims < Y->getDimensions ().nbDims ) {
54
+ auto * tmp = X;
55
+ X = Y;
56
+ Y = tmp;
57
+ }
58
+ nvinfer1::Dims dims_x = X->getDimensions ();
59
+ nvinfer1::Dims dims_y = Y->getDimensions ();
60
+ auto output_name = op_desc.Output (" Out" )[0 ];
141
61
142
- std::vector<int > dims_y = phi::vectorize<int >(Y_t->dims ());
143
- if (dims_y.size () == no_batch_dims.size () + 1 ) {
144
- if (dims_y[0 ] == 1 ) dims_y.erase (dims_y.begin ());
62
+ // axis here is relative to explicit batch
63
+ int axis = BOOST_GET_CONST (int , op_desc.GetAttr (" axis" ));
64
+ int real_x_rank = dims_x.nbDims ;
65
+ int real_y_rank = dims_y.nbDims ;
66
+ if (!engine_->with_dynamic_shape ()) {
67
+ real_x_rank++;
68
+ real_y_rank++;
69
+ if (Y_v) real_y_rank--;
70
+ }
71
+ if (axis == -1 ) {
72
+ axis = real_x_rank - real_y_rank;
73
+ }
74
+ if (!engine_->with_dynamic_shape () && axis > 0 ) {
75
+ axis--;
145
76
}
146
77
147
- if (dims_y.size () == 1 && dims_y[0 ] == no_batch_dims[0 ]) {
148
- scale_mode = nvinfer1::ScaleMode::kCHANNEL ;
149
- } else if (dims_y.size () == no_batch_dims.size () &&
150
- dims_y[0 ] == no_batch_dims[0 ]) {
151
- scale_mode = nvinfer1::ScaleMode::kELEMENTWISE ;
152
- for (size_t i = 1 ; i < no_batch_dims.size (); i++) {
153
- if (dims_y[i] != no_batch_dims[i]) {
154
- scale_mode = nvinfer1::ScaleMode::kCHANNEL ;
155
- break ;
78
+ // X: - - - - - - -
79
+ // axis
80
+ // Y: - - -
81
+ // we need expand Y's rank = X's rank
82
+ int left_one_num = axis;
83
+ int right_one_num = dims_x.nbDims - axis - dims_y.nbDims ;
84
+ nvinfer1::IShuffleLayer* reshape_layer;
85
+ nvinfer1::ITensor* reshape_y_tensor;
86
+ if (left_one_num > 0 || right_one_num > 0 ) {
87
+ if (engine_->with_dynamic_shape ()) {
88
+ auto * y_shape_tensor = Shape (Y);
89
+ auto * new_y_shape_tensor = y_shape_tensor;
90
+ if (axis > 0 ) {
91
+ std::vector<int32_t > left_one (left_one_num, 1 );
92
+ auto * left_one_tensor = Add1DConstantLayer (left_one);
93
+ new_y_shape_tensor = Concat (std::vector<nvinfer1::ITensor*>{
94
+ left_one_tensor, new_y_shape_tensor});
156
95
}
157
- }
158
- if (scale_mode == nvinfer1::ScaleMode::kCHANNEL ) {
159
- for (size_t i = 1 ; i < no_batch_dims.size (); i++) {
160
- if (dims_y[i] != 1 )
161
- PADDLE_THROW (platform::errors::InvalidArgument (
162
- " The bias's %d dim is %d, but TensorRT dynamic shape only "
163
- " support it equals to 1 for Elementwise op!" ,
164
- i, dims_y[i]));
96
+ if (right_one_num > 0 ) {
97
+ std::vector<int32_t > right_one (right_one_num, 1 );
98
+ auto * right_one_tensor = Add1DConstantLayer (right_one);
99
+ new_y_shape_tensor = Concat (std::vector<nvinfer1::ITensor*>{
100
+ new_y_shape_tensor, right_one_tensor});
165
101
}
166
- }
167
- } else {
168
- if (dims_y.size () >= 1 ) {
169
- PADDLE_THROW (platform::errors::InvalidArgument (
170
- " The size of bias's dims is %d and bias's size is %d. TensorRT "
171
- " doesn't support this shape for Elementwise op!" ,
172
- dims_y.size (), dims_y[0 ]));
102
+ reshape_layer = TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *Y);
103
+ reshape_layer->setInput (1 , *new_y_shape_tensor);
173
104
} else {
174
- PADDLE_THROW (platform::errors::InvalidArgument (
175
- " The size of bias's dims is %d. TensorRT doesn't support "
176
- " this shape for Elementwise op!" ,
177
- dims_y.size ()));
105
+ nvinfer1::Dims new_y_dims;
106
+ new_y_dims.nbDims = left_one_num + dims_y.nbDims + right_one_num;
107
+ for (int i = 0 ; i < new_y_dims.nbDims ; i++) new_y_dims.d [i] = 1 ;
108
+ for (int i = 0 ; i < dims_y.nbDims ; i++)
109
+ new_y_dims.d [left_one_num + i] = dims_y.d [i];
110
+ reshape_layer = TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *Y);
111
+ reshape_layer->setReshapeDimensions (new_y_dims);
178
112
}
113
+ reshape_y_tensor = reshape_layer->getOutput (0 );
114
+ } else {
115
+ // In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
116
+ // 6015 faling, how ridiculous!
117
+ reshape_y_tensor = Y;
179
118
}
180
- regist_eltwise_weight (scale_mode);
181
- }
182
-
183
- protected:
184
- std::string op_type_;
185
- };
186
119
187
- class ElementwiseTensorOpConverter : public OpConverter {
188
- public:
189
- ElementwiseTensorOpConverter () {}
190
- void operator ()(const framework::proto::OpDesc& op,
191
- const framework::Scope& scope, bool test_mode) override {
192
120
auto op_pair = ops.find (op_type_);
193
- PADDLE_ENFORCE_NE (op_pair, ops.end (),
121
+ PADDLE_ENFORCE_NE (op_pair,
122
+ ops.end (),
194
123
platform::errors::InvalidArgument (
195
124
" Elementwise op's type(%s) is not supported. Please "
196
125
" check if the op_type is correct." ,
197
126
op_type_));
198
127
199
- // Here the two nullptr looks strange, that's because the
200
- // framework::OpDesc's constructor is strange.
201
- framework::OpDesc op_desc (op, nullptr );
202
- nvinfer1::ILayer* layer = nullptr ;
203
-
204
- auto * X = engine_->GetITensor (op_desc.Input (" X" ).front ());
205
- auto * Y = engine_->GetITensor (op_desc.Input (" Y" ).front ());
206
- std::vector<nvinfer1::ITensor*> itensors;
207
- itensors.push_back (X);
208
- itensors.push_back (Y);
209
- nvinfer1::Dims dims_x = X->getDimensions ();
210
- nvinfer1::Dims dims_y = Y->getDimensions ();
211
-
212
- int axis = BOOST_GET_CONST (int , op_desc.GetAttr (" axis" ));
213
- auto output_name = op_desc.Output (" Out" )[0 ];
214
-
215
- auto common_func = [&](nvinfer1::ILayer* layer) {
216
- RreplenishLayerAndOutput (layer, " elementwise" , {output_name}, test_mode);
217
- };
218
-
219
- if (dims_x.nbDims == dims_y.nbDims ) {
220
- // The two input tensor should have the same dims
221
- VLOG (3 ) << " Convert a fluid elementwise op to TensorRT IElementWiseLayer" ;
222
- nvinfer1::IElementWiseLayer* elet_layer =
223
- TRT_ENGINE_ADD_LAYER (engine_, ElementWise, *X, *Y, op_pair->second );
224
-
225
- layer = elet_layer;
226
- } else {
227
- VLOG (3 ) << " Convert a fluid elementwise op to TensorRT "
228
- " ElementWisePluginLayer" ;
229
- if (engine_->with_dynamic_shape ()) {
230
- #if IS_TRT_VERSION_GE(6000)
231
- plugin::ElementwisePluginDynamic* plugin =
232
- new plugin::ElementwisePluginDynamic (op_type_, axis);
233
- layer = engine_->AddDynamicPlugin (itensors.data (), 2 , plugin);
234
- #else
235
- PADDLE_THROW (platform::errors::Fatal (
236
- " You are running the TRT Dynamic Shape mode, need to confirm that "
237
- " your TRT version is no less than 6.0" ));
238
- #endif
239
- } else {
240
- plugin::ElementWisePlugin* plugin =
241
- new plugin::ElementWisePlugin (op_type_, dims_x, dims_y, axis);
242
-
243
- std::vector<nvinfer1::ITensor*> inputs{X, Y};
244
- auto * plugin_layer = engine_->AddPlugin (
245
- inputs.data (), inputs.size (),
246
- reinterpret_cast <plugin::PluginTensorRT*>(plugin));
247
-
248
- layer = plugin_layer;
249
- }
250
- }
251
- common_func (layer);
128
+ auto * layer = TRT_ENGINE_ADD_LAYER (
129
+ engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second );
130
+ RreplenishLayerAndOutput (layer, " elementwise" , {output_name}, test_mode);
252
131
}
253
132
254
133
protected:
@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
268
147
{" max" , nvinfer1::ElementWiseOperation::kMAX },
269
148
};
270
149
271
- class ElementwiseWeightAddOpConverter : public ElementwiseWeightOpConverter {
272
- public:
273
- ElementwiseWeightAddOpConverter () { op_type_ = " add" ; }
274
- };
275
-
276
- class ElementwiseWeightMulOpConverter : public ElementwiseWeightOpConverter {
277
- public:
278
- ElementwiseWeightMulOpConverter () { op_type_ = " mul" ; }
279
- };
280
-
281
150
class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
282
151
public:
283
152
ElementwiseTensorAddOpConverter () { op_type_ = " add" ; }
@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
318
187
} // namespace paddle
319
188
320
189
REGISTER_TRT_OP_CONVERTER (elementwise_add_weight,
321
- ElementwiseWeightAddOpConverter );
190
+ ElementwiseTensorAddOpConverter );
322
191
REGISTER_TRT_OP_CONVERTER (elementwise_mul_weight,
323
- ElementwiseWeightMulOpConverter);
192
+ ElementwiseTensorMulOpConverter);
193
+ REGISTER_TRT_OP_CONVERTER (elementwise_sub_weight,
194
+ ElementwiseTensorSubOpConverter);
195
+ REGISTER_TRT_OP_CONVERTER (elementwise_div_weight,
196
+ ElementwiseTensorDivOpConverter);
197
+ REGISTER_TRT_OP_CONVERTER (elementwise_pow_weight,
198
+ ElementwiseTensorPowOpConverter);
324
199
325
200
REGISTER_TRT_OP_CONVERTER (elementwise_add_tensor,
326
201
ElementwiseTensorAddOpConverter);
0 commit comments