@@ -34,51 +34,97 @@ namespace tensorrt {
34
34
class FcOpConverter : public OpConverter {
35
35
public:
36
36
nvinfer1::ILayer* reshape_before_fc (nvinfer1::ITensor* before_fc,
37
- nvinfer1::Dims x_dim, int x_num_col_dims,
37
+ nvinfer1::Dims x_dim,
38
+ int x_num_col_dims,
38
39
std::string output_name) {
39
40
// add shuffle before fc
40
41
nvinfer1::Dims reshape_before_fc_dim;
41
42
reshape_before_fc_dim.nbDims = x_num_col_dims + 3 ;
42
43
// padding shape "* x q x 1 x 1"
43
- for (int i = 0 ; i < reshape_before_fc_dim.nbDims ; i++) {
44
- reshape_before_fc_dim.d [i] = 1 ;
45
- }
46
- for (int i = 0 ; i < x_dim.nbDims ; i++) {
47
- if (i < x_num_col_dims) {
48
- reshape_before_fc_dim.d [i] = 0 ;
49
- } else {
50
- if (x_dim.d [i] < 0 ) {
51
- reshape_before_fc_dim.d [x_num_col_dims] = -1 ;
52
- break ;
44
+
45
+ nvinfer1::ITensor* filal_reshape_before_fc_shape_tensor = nullptr ;
46
+
47
+ if (!engine_->with_dynamic_shape ()) {
48
+ for (int i = 0 ; i < reshape_before_fc_dim.nbDims ; i++) {
49
+ reshape_before_fc_dim.d [i] = 1 ;
50
+ }
51
+ for (int i = 0 ; i < x_dim.nbDims ; i++) {
52
+ if (i < x_num_col_dims) {
53
+ reshape_before_fc_dim.d [i] = 0 ;
54
+ } else {
55
+ reshape_before_fc_dim.d [x_num_col_dims] *= x_dim.d [i];
56
+ }
57
+ }
58
+ } else {
59
+ std::vector<nvinfer1::ITensor*> reshape_before_fc_shape_tensor;
60
+ nvinfer1::ITensor* input_shape_tensor = Shape (before_fc);
61
+
62
+ for (int i = 0 ; i < reshape_before_fc_dim.nbDims ; i++) {
63
+ reshape_before_fc_shape_tensor.push_back (Add1DConstantLayer (1 ));
64
+ }
65
+ for (int i = 0 ; i < x_dim.nbDims ; i++) {
66
+ if (i < x_num_col_dims) {
67
+ reshape_before_fc_shape_tensor[i] =
68
+ GetEleTensorOfShape (input_shape_tensor, i);
69
+ } else {
70
+ reshape_before_fc_shape_tensor[x_num_col_dims] =
71
+ Prod (GetEleTensorOfShape (input_shape_tensor, i),
72
+ reshape_before_fc_shape_tensor[x_num_col_dims]);
73
+ // If not set, test_trt_matmul_quant_dequant in trt 6015 will fail
74
+ reshape_before_fc_shape_tensor[x_num_col_dims]->setType (
75
+ nvinfer1::DataType::kINT32 );
53
76
}
54
- reshape_before_fc_dim.d [x_num_col_dims] *= x_dim.d [i];
55
77
}
78
+ filal_reshape_before_fc_shape_tensor =
79
+ Concat (reshape_before_fc_shape_tensor);
56
80
}
81
+
57
82
auto * reshape_before_fc_layer =
58
83
TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *before_fc);
59
- reshape_before_fc_layer->setReshapeDimensions (reshape_before_fc_dim);
84
+ if (!engine_->with_dynamic_shape ()) {
85
+ reshape_before_fc_layer->setReshapeDimensions (reshape_before_fc_dim);
86
+ } else {
87
+ reshape_before_fc_layer->setInput (1 ,
88
+ *filal_reshape_before_fc_shape_tensor);
89
+ }
60
90
reshape_before_fc_layer->setName (
61
91
(" fc_op_reshape_before_fc: Shuffle (Output: " + output_name + " )" )
62
92
.c_str ());
63
93
return reshape_before_fc_layer;
64
94
}
65
95
66
96
nvinfer1::ILayer* reshape_after_fc (nvinfer1::ITensor* after_fc,
67
- nvinfer1::Dims x_dim, int x_num_col_dims) {
97
+ nvinfer1::Dims x_dim,
98
+ int x_num_col_dims) {
68
99
// add shuffle after fc
69
100
nvinfer1::Dims reshape_after_fc_dim;
70
101
reshape_after_fc_dim.nbDims = x_num_col_dims + 1 ;
71
- for (int i = 0 ; i < reshape_after_fc_dim.nbDims ; i++) {
72
- reshape_after_fc_dim.d [i] = 0 ;
102
+
103
+ nvinfer1::ITensor* filal_reshape_after_fc_shape_tensor = nullptr ;
104
+ if (!engine_->with_dynamic_shape ()) {
105
+ for (int i = 0 ; i < reshape_after_fc_dim.nbDims ; i++) {
106
+ reshape_after_fc_dim.d [i] = 0 ;
107
+ }
108
+ } else {
109
+ std::vector<int > gather_indices (x_num_col_dims + 1 );
110
+ std::iota (gather_indices.begin (), gather_indices.end (), 0 );
111
+ filal_reshape_after_fc_shape_tensor =
112
+ Gather (Shape (after_fc), gather_indices);
73
113
}
114
+
74
115
auto * reshape_after_fc_layer =
75
116
TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *after_fc);
76
- reshape_after_fc_layer->setReshapeDimensions (reshape_after_fc_dim);
117
+ if (!engine_->with_dynamic_shape ()) {
118
+ reshape_after_fc_layer->setReshapeDimensions (reshape_after_fc_dim);
119
+ } else {
120
+ reshape_after_fc_layer->setInput (1 , *filal_reshape_after_fc_shape_tensor);
121
+ }
77
122
return reshape_after_fc_layer;
78
123
}
79
124
80
125
void operator ()(const framework::proto::OpDesc& op,
81
- const framework::Scope& scope, bool test_mode) override {
126
+ const framework::Scope& scope,
127
+ bool test_mode) override {
82
128
VLOG (3 ) << " convert a fluid fc op to tensorrt fc layer without bias" ;
83
129
framework::OpDesc op_desc (op, nullptr );
84
130
auto output_name = op_desc.Output (" Out" ).front ();
@@ -96,8 +142,9 @@ class FcOpConverter : public OpConverter {
96
142
// Declare weights
97
143
auto * Y_v = scope.FindVar (op_desc.Input (w_name).front ());
98
144
PADDLE_ENFORCE_NOT_NULL (
99
- Y_v, platform::errors::NotFound (
100
- " Can not find %s presistale var of fc in scope." , w_name));
145
+ Y_v,
146
+ platform::errors::NotFound (
147
+ " Can not find %s presistale var of fc in scope." , w_name));
101
148
auto * Y_t = Y_v->GetMutable <framework::LoDTensor>();
102
149
int x_num_col_dims =
103
150
op_desc.HasAttr (" x_num_col_dims" )
@@ -128,7 +175,8 @@ class FcOpConverter : public OpConverter {
128
175
}
129
176
weight_data = engine_->GetWeightCPUData (op_desc.Input (w_name).front (), Y_t);
130
177
131
- PADDLE_ENFORCE_EQ (Y_t->dims ().size (), 2UL ,
178
+ PADDLE_ENFORCE_EQ (Y_t->dims ().size (),
179
+ 2UL ,
132
180
platform::errors::InvalidArgument (
133
181
" The fc's weight should be a matrix with 2 dims, but "
134
182
" it's %d-dimensional." ,
@@ -143,25 +191,31 @@ class FcOpConverter : public OpConverter {
143
191
}
144
192
};
145
193
146
- auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
194
+ auto regist_fc = [&](nvinfer1::ITensor* inputs,
195
+ int n_output,
147
196
TensorRTEngine::Weight& weight,
148
197
TensorRTEngine::Weight& bias) {
149
198
if (enable_int8 || support_int8) {
150
199
// add conv layer
151
200
float out_scale = 0 ;
152
201
if (enable_int8) {
153
202
PADDLE_ENFORCE_EQ (
154
- op_desc.HasAttr (" out_threshold" ), true ,
203
+ op_desc.HasAttr (" out_threshold" ),
204
+ true ,
155
205
platform::errors::InvalidArgument (
156
206
" must have out threshold in fc layers in int8 mode" ));
157
207
out_scale = BOOST_GET_CONST (float , op_desc.GetAttr (" out_threshold" ));
158
208
} else {
159
209
out_scale = BOOST_GET_CONST (float , op_desc.GetAttr (" Out" ));
160
210
}
161
211
nvinfer1::DimsHW nv_ksize (1 , 1 );
162
- auto * fc_layer_int8 =
163
- TRT_ENGINE_ADD_LAYER (engine_, Convolution, *inputs, n_output,
164
- nv_ksize, weight.get (), bias.get ());
212
+ auto * fc_layer_int8 = TRT_ENGINE_ADD_LAYER (engine_,
213
+ Convolution,
214
+ *inputs,
215
+ n_output,
216
+ nv_ksize,
217
+ weight.get (),
218
+ bias.get ());
165
219
fc_layer_int8->setName (
166
220
(" fc_op_int8_conv1x1: Convolution (Output: " + output_name + " )" )
167
221
.c_str ());
@@ -174,21 +228,29 @@ class FcOpConverter : public OpConverter {
174
228
.c_str ());
175
229
engine_->SetTensorDynamicRange (fc_after_reshape_int8->getOutput (0 ),
176
230
out_scale);
177
- nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER (
178
- engine_, Activation, *(fc_after_reshape_int8->getOutput (0 )),
179
- nvinfer1::ActivationType::kRELU );
180
- RreplenishLayerAndOutput (relu_layer_int8, " relu_after_fc_shuffle" ,
181
- {output_name}, test_mode);
231
+ nvinfer1::IActivationLayer* relu_layer_int8 =
232
+ TRT_ENGINE_ADD_LAYER (engine_,
233
+ Activation,
234
+ *(fc_after_reshape_int8->getOutput (0 )),
235
+ nvinfer1::ActivationType::kRELU );
236
+ RreplenishLayerAndOutput (relu_layer_int8,
237
+ " relu_after_fc_shuffle" ,
238
+ {output_name},
239
+ test_mode);
182
240
} else {
183
241
RreplenishLayerAndOutput (fc_after_reshape_int8,
184
242
" fc_op_int8_reshape_after_fc: Shuffle" ,
185
- {output_name}, test_mode);
243
+ {output_name},
244
+ test_mode);
186
245
}
187
246
} else {
188
247
// add fc layer
189
- auto * fc_layer_float =
190
- TRT_ENGINE_ADD_LAYER (engine_, FullyConnected, *inputs, n_output,
191
- weight.get (), bias.get ());
248
+ auto * fc_layer_float = TRT_ENGINE_ADD_LAYER (engine_,
249
+ FullyConnected,
250
+ *inputs,
251
+ n_output,
252
+ weight.get (),
253
+ bias.get ());
192
254
fc_layer_float->setName (
193
255
(" fc_op_float: FullyConnected (Output: " + output_name + " )" )
194
256
.c_str ());
@@ -198,14 +260,20 @@ class FcOpConverter : public OpConverter {
198
260
fc_after_reshape_float->setName (
199
261
(" float_reshape_after_fc: Shuffle (Output: " + output_name + " )" )
200
262
.c_str ());
201
- nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER (
202
- engine_, Activation, *(fc_after_reshape_float->getOutput (0 )),
203
- nvinfer1::ActivationType::kRELU );
204
- RreplenishLayerAndOutput (relu_layer_float, " relu_after_fc_shuffle" ,
205
- {output_name}, test_mode);
263
+ nvinfer1::IActivationLayer* relu_layer_float =
264
+ TRT_ENGINE_ADD_LAYER (engine_,
265
+ Activation,
266
+ *(fc_after_reshape_float->getOutput (0 )),
267
+ nvinfer1::ActivationType::kRELU );
268
+ RreplenishLayerAndOutput (relu_layer_float,
269
+ " relu_after_fc_shuffle" ,
270
+ {output_name},
271
+ test_mode);
206
272
} else {
207
- RreplenishLayerAndOutput (fc_after_reshape_float, " shuffle_after_fc" ,
208
- {output_name}, test_mode);
273
+ RreplenishLayerAndOutput (fc_after_reshape_float,
274
+ " shuffle_after_fc" ,
275
+ {output_name},
276
+ test_mode);
209
277
}
210
278
}
211
279
};
@@ -255,15 +323,20 @@ class FcOpConverter : public OpConverter {
255
323
if (enable_int8 || support_int8) {
256
324
// add conv1x1 layer
257
325
nvinfer1::DimsHW nv_ksize (1 , 1 );
258
- auto * fc_layer_int8 =
259
- TRT_ENGINE_ADD_LAYER (engine_, Convolution, *X, n_output, nv_ksize,
260
- weight.get (), bias.get ());
326
+ auto * fc_layer_int8 = TRT_ENGINE_ADD_LAYER (engine_,
327
+ Convolution,
328
+ *X,
329
+ n_output,
330
+ nv_ksize,
331
+ weight.get (),
332
+ bias.get ());
261
333
if (activation_type == " relu" ) {
262
334
fc_layer_int8->setName (
263
335
(" ernie_fc_op_int8: Convolution (Output: " + output_name + " )" )
264
336
.c_str ());
265
337
PADDLE_ENFORCE_EQ (
266
- op_desc.HasAttr (" out_threshold" ), true ,
338
+ op_desc.HasAttr (" out_threshold" ),
339
+ true ,
267
340
platform::errors::InvalidArgument (
268
341
" must have out threshold in fc layers in int8 mode" ));
269
342
float out_scale = 0 ;
@@ -275,15 +348,20 @@ class FcOpConverter : public OpConverter {
275
348
}
276
349
engine_->SetTensorDynamicRange (fc_layer_int8->getOutput (0 ),
277
350
out_scale);
278
- nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER (
279
- engine_, Activation, *(fc_layer_int8->getOutput (0 )),
280
- nvinfer1::ActivationType::kRELU );
281
- RreplenishLayerAndOutput (relu_layer_int8, " relu_after_ernie_fc_int8" ,
282
- {output_name}, test_mode);
351
+ nvinfer1::IActivationLayer* relu_layer_int8 =
352
+ TRT_ENGINE_ADD_LAYER (engine_,
353
+ Activation,
354
+ *(fc_layer_int8->getOutput (0 )),
355
+ nvinfer1::ActivationType::kRELU );
356
+ RreplenishLayerAndOutput (relu_layer_int8,
357
+ " relu_after_ernie_fc_int8" ,
358
+ {output_name},
359
+ test_mode);
283
360
} else {
284
361
RreplenishLayerAndOutput (fc_layer_int8,
285
362
" ernie_fc_op_int8: Convolution" ,
286
- {output_name}, test_mode);
363
+ {output_name},
364
+ test_mode);
287
365
}
288
366
} else {
289
367
// add fc layer
@@ -292,25 +370,30 @@ class FcOpConverter : public OpConverter {
292
370
if (activation_type == " relu" ) {
293
371
fc_layer_float->setName (
294
372
(" ernie_fc_op_float: (Output: " + output_name + " )" ).c_str ());
295
- nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER (
296
- engine_, Activation, *(fc_layer_float->getOutput (0 )),
297
- nvinfer1::ActivationType::kRELU );
373
+ nvinfer1::IActivationLayer* relu_layer_float =
374
+ TRT_ENGINE_ADD_LAYER (engine_,
375
+ Activation,
376
+ *(fc_layer_float->getOutput (0 )),
377
+ nvinfer1::ActivationType::kRELU );
298
378
RreplenishLayerAndOutput (relu_layer_float,
299
- " relu_after_ernie_fc_float" , {output_name},
379
+ " relu_after_ernie_fc_float" ,
380
+ {output_name},
300
381
test_mode);
301
382
} else {
302
- RreplenishLayerAndOutput (fc_layer_float, " ernie_fc_op_float " ,
303
- {output_name}, test_mode);
383
+ RreplenishLayerAndOutput (
384
+ fc_layer_float, " ernie_fc_op_float " , {output_name}, test_mode);
304
385
}
305
386
}
306
387
} else { // need reshape input before and after fc
307
388
PADDLE_ENFORCE_GT (
308
- x_dim.nbDims , x_num_col_dims,
389
+ x_dim.nbDims ,
390
+ x_num_col_dims,
309
391
platform::errors::InvalidArgument (
310
392
" Params and input dims mismatch. Paddle-TRT FC "
311
393
" converter expects x_dim.nbDims > x_num_col_dims, but "
312
394
" x_dim.nbDims : %d, x_num_col_dims : %d." ,
313
- x_dim.nbDims , x_num_col_dims));
395
+ x_dim.nbDims ,
396
+ x_num_col_dims));
314
397
auto * reshape_before_fc_layer =
315
398
reshape_before_fc (X, x_dim, x_num_col_dims, output_name);
316
399
auto * reshape_itensor = reshape_before_fc_layer->getOutput (0 );
0 commit comments