@@ -37,7 +37,7 @@ class FcOpConverter : public OpConverter {
37
37
const framework::Scope& scope, bool test_mode) override {
38
38
VLOG (3 ) << " convert a fluid fc op to tensorrt fc layer without bias" ;
39
39
framework::OpDesc op_desc (op, nullptr );
40
-
40
+ auto output_name = op_desc. Output ( " Out " ). front ();
41
41
auto input_names = op_desc.InputNames ();
42
42
bool with_bias = input_names.size () >= 3 ;
43
43
std::string w_name = " Y" ;
@@ -48,13 +48,14 @@ class FcOpConverter : public OpConverter {
48
48
}
49
49
// Declare inputs
50
50
auto * X = engine_->GetITensor (op_desc.Input (i_name).front ());
51
+ auto x_dim = X->getDimensions ();
51
52
// Declare weights
52
53
auto * Y_v = scope.FindVar (op_desc.Input (w_name).front ());
53
54
PADDLE_ENFORCE_NOT_NULL (
54
55
Y_v, platform::errors::NotFound (
55
56
" Can not find %s presistale var of fc in scope." , w_name));
56
57
auto * Y_t = Y_v->GetMutable <framework::LoDTensor>();
57
- const int x_num_col_dims =
58
+ int x_num_col_dims =
58
59
op_desc.HasAttr (" x_num_col_dims" )
59
60
? BOOST_GET_CONST (int , op_desc.GetAttr (" x_num_col_dims" ))
60
61
: (op_desc.HasAttr (" in_num_col_dims" )
@@ -106,31 +107,61 @@ class FcOpConverter : public OpConverter {
106
107
auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
107
108
TensorRTEngine::Weight& weight,
108
109
TensorRTEngine::Weight& bias) {
109
- nvinfer1::ILayer* fc_layer = nullptr ;
110
110
if (enable_int8) {
111
+ // add conv layer
111
112
PADDLE_ENFORCE_EQ (
112
113
op_desc.HasAttr (" out_threshold" ), true ,
113
114
platform::errors::InvalidArgument (
114
115
" must have out threshold in fc layers in int8 mode" ));
115
116
float out_scale =
116
117
BOOST_GET_CONST (float , op_desc.GetAttr (" out_threshold" ));
117
118
nvinfer1::DimsHW nv_ksize (1 , 1 );
118
- fc_layer = TRT_ENGINE_ADD_LAYER (engine_, Convolution, *inputs, n_output,
119
- nv_ksize, weight. get (), bias. get ());
120
- engine_-> SetTensorDynamicRange (fc_layer-> getOutput ( 0 ), out_scale );
121
- } else {
122
- fc_layer = TRT_ENGINE_ADD_LAYER (engine_, FullyConnected, *inputs,
123
- n_output, weight. get (), bias. get ());
124
- }
125
-
126
- auto output_name = op_desc. Output ( " Out " ). front ();
127
- if (activation_type == " relu " ) {
128
- nvinfer1::IActivationLayer* relu_layer =
129
- TRT_ENGINE_ADD_LAYER (engine_, Activation, *(fc_layer-> getOutput ( 0 )) ,
130
- nvinfer1::ActivationType:: kRELU );
131
- RreplenishLayerAndOutput (relu_layer, " fc " , {output_name}, test_mode);
119
+ auto * fc_layer_int8 =
120
+ TRT_ENGINE_ADD_LAYER (engine_, Convolution, *inputs, n_output,
121
+ nv_ksize, weight. get ( ), bias. get () );
122
+ engine_-> SetTensorDynamicRange (fc_layer_int8-> getOutput ( 0 ), out_scale);
123
+ if (activation_type == " relu " ) {
124
+ nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER (
125
+ engine_, Activation, *(fc_layer_int8-> getOutput ( 0 )),
126
+ nvinfer1::ActivationType:: kRELU );
127
+ RreplenishLayerAndOutput (relu_layer_int8, " relu_after_fc_shuffle " ,
128
+ {output_name}, test_mode);
129
+ } else {
130
+ RreplenishLayerAndOutput (fc_layer_int8, " shuffle_after_fc " ,
131
+ {output_name}, test_mode );
132
+ }
132
133
} else {
133
- RreplenishLayerAndOutput (fc_layer, " fc" , {output_name}, test_mode);
134
+ // add fc layer
135
+ auto * fc_layer_before =
136
+ TRT_ENGINE_ADD_LAYER (engine_, FullyConnected, *inputs, n_output,
137
+ weight.get (), bias.get ());
138
+ fc_layer_before->setName (
139
+ (" fc_layer_before(Output: " + output_name + " )" ).c_str ());
140
+ // add shuffle after fc
141
+ nvinfer1::Dims reshape_after_fc_dim;
142
+ if (engine_->use_oss () && engine_->with_ernie () && x_dim.nbDims == 4 &&
143
+ x_dim.d [2 ] == 1 && x_dim.d [3 ] == 1 && x_num_col_dims == 1 ) {
144
+ // If use tensorrt'oss, the x_dim and x_num_col_dims need change
145
+ reshape_after_fc_dim.nbDims = 4 ;
146
+ } else {
147
+ reshape_after_fc_dim.nbDims = x_num_col_dims + 1 ;
148
+ }
149
+ for (int i = 0 ; i < reshape_after_fc_dim.nbDims ; i++) {
150
+ reshape_after_fc_dim.d [i] = 0 ;
151
+ }
152
+ auto * fc_layer_float = TRT_ENGINE_ADD_LAYER (
153
+ engine_, Shuffle, *fc_layer_before->getOutput (0 ));
154
+ fc_layer_float->setReshapeDimensions (reshape_after_fc_dim);
155
+ if (activation_type == " relu" ) {
156
+ nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER (
157
+ engine_, Activation, *(fc_layer_float->getOutput (0 )),
158
+ nvinfer1::ActivationType::kRELU );
159
+ RreplenishLayerAndOutput (relu_layer_float, " relu_after_fc_shuffle" ,
160
+ {output_name}, test_mode);
161
+ } else {
162
+ RreplenishLayerAndOutput (fc_layer_float, " shuffle_after_fc" ,
163
+ {output_name}, test_mode);
164
+ }
134
165
}
135
166
};
136
167
@@ -157,153 +188,47 @@ class FcOpConverter : public OpConverter {
157
188
static_cast <void *>(bias_data),
158
189
static_cast <size_t >(bias_num)};
159
190
160
- if (engine_->with_dynamic_shape ()) {
161
- // not NCHW layout, but NLP layout with added 'x 1 x 1'
162
- auto x_dim = X->getDimensions ();
163
- if (engine_->use_oss () && engine_->with_ernie () && x_dim.nbDims == 4 &&
164
- x_dim.d [2 ] == 1 && x_dim.d [3 ] == 1 && x_num_col_dims == 2 ) {
165
- // fc which is just after self attention
166
- regist_fc (X, n_output, weight, bias);
167
- return ;
168
- }
169
- PADDLE_ENFORCE_LE (
170
- x_dim.nbDims - x_num_col_dims, 3 ,
171
- platform::errors::InvalidArgument (
172
- " Params and input dims mismatch. Paddle-TRT FC "
173
- " converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
174
- " x_dim.nbDims = %d, x_num_col_dims = %d." ,
175
- x_dim.nbDims , x_num_col_dims));
176
- auto output_name = op_desc.Output (" Out" ).front ();
177
- // add shuffle before fc
178
- nvinfer1::Dims reshape_before_fc_dim;
179
- // padding shape "x 1 x 1"
180
- int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
181
- reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
182
- int cur_dim_index = reshape_before_fc_dim.nbDims - 1 ;
183
- while (padding_length-- > 0 ) {
184
- reshape_before_fc_dim.d [cur_dim_index--] = 1 ;
185
- }
186
- while (cur_dim_index >= 0 ) {
187
- reshape_before_fc_dim.d [cur_dim_index--] = 0 ;
188
- }
189
-
190
- auto * reshape_before_fc_layer =
191
- TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *X);
192
- reshape_before_fc_layer->setReshapeDimensions (reshape_before_fc_dim);
193
- reshape_before_fc_layer->setName (
194
- (" shuffle_before_fc(Output: " + output_name + " )" ).c_str ());
195
-
196
- // add fc layer
197
- auto * fc_layer = TRT_ENGINE_ADD_LAYER (
198
- engine_, FullyConnected, *reshape_before_fc_layer->getOutput (0 ),
199
- n_output, weight.get (), bias.get ());
200
- fc_layer->setName ((" fc_layer(Output: " + output_name + " )" ).c_str ());
201
-
202
- // add shuffle after fc
203
- nvinfer1::Dims reshape_after_fc_dim;
204
- reshape_after_fc_dim.nbDims = x_num_col_dims + 1 ;
205
- for (int i = 0 ; i < reshape_after_fc_dim.nbDims ; i++) {
206
- reshape_after_fc_dim.d [i] = 0 ;
207
- }
208
-
209
- auto * reshape_after_fc_layer =
210
- TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *fc_layer->getOutput (0 ));
211
- reshape_after_fc_layer->setReshapeDimensions (reshape_after_fc_dim);
212
-
213
- if (activation_type == " relu" ) {
214
- reshape_after_fc_layer->setName (
215
- (" shuffle_after_fc(Output: " + output_name + " )" ).c_str ());
216
- nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER (
217
- engine_, Activation, *(reshape_after_fc_layer->getOutput (0 )),
218
- nvinfer1::ActivationType::kRELU );
219
- RreplenishLayerAndOutput (relu_layer, " relu_after_fc_shuffle" ,
220
- {output_name}, test_mode);
221
- } else {
222
- RreplenishLayerAndOutput (reshape_after_fc_layer, " shuffle_after_fc" ,
223
- {output_name}, test_mode);
224
- }
225
- return ;
191
+ // Running the TRT Static Shape mode: x_num_col_dims-1
192
+ if (!engine_->with_dynamic_shape ()) {
193
+ x_num_col_dims--;
226
194
}
227
- // in order to handle situations in NLP models(input dims < 3,
228
- // x_num_col_dims != 1, etc.), reshape input to perform FC correctly.
229
- auto * reshape_itensor = X;
230
- int input_dims = X->getDimensions ().nbDims ;
231
- auto input_d = X->getDimensions ().d ;
232
- int reshape_dim3[3 ] = {0 };
233
- int reshape_dim4[4 ] = {0 };
234
- PADDLE_ENFORCE_LE (x_num_col_dims, input_dims,
235
- platform::errors::InvalidArgument (
236
- " Params and input dims mismatch. Paddle-TRT FC "
237
- " converter expects x_num_col_dims <= input dims" ));
238
- if (x_num_col_dims == 1 ) {
239
- if (input_dims == 4 ) {
240
- PADDLE_ENFORCE_EQ (
241
- input_d[3 ], 1 ,
242
- platform::errors::InvalidArgument (
243
- " Invalid dimensions. When x_num_col_dims equals to 1 and input "
244
- " dims equals to 4, the last dim of input must be 1, but got %d" ,
245
- input_d[3 ]));
246
- }
247
- if (enable_int8) {
248
- reshape_dim3[0 ] = 1 ;
249
- for (int i = 0 ; i < 3 ; i++) {
250
- reshape_dim3[0 ] *= input_d[i];
251
- if (i > 0 ) {
252
- reshape_dim3[i] = 1 ;
253
- }
254
- }
255
- } else {
256
- for (int i = 0 ; i < 3 ; i++) {
257
- if (i < input_dims) {
258
- reshape_dim3[i] = input_d[i];
259
- } else {
260
- reshape_dim3[i] = 1 ;
261
- }
262
- }
263
- }
264
-
265
- nvinfer1::Dims3 reshape_dim (reshape_dim3[0 ], reshape_dim3[1 ],
266
- reshape_dim3[2 ]);
267
- auto * reshape_layer = TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *X);
268
- reshape_layer->setReshapeDimensions (reshape_dim);
269
- reshape_itensor = reshape_layer->getOutput (0 );
270
- if (enable_int8) {
271
- engine_->SetTensorDynamicRange (reshape_itensor, in_scale);
272
- }
273
- } else {
274
- PADDLE_ENFORCE_NE (input_dims, 1 ,
275
- platform::errors::InvalidArgument (
276
- " Invalid dimensions. When x_num_col_dims equals to "
277
- " 2, input_dims should not be 1" ));
278
-
279
- if (enable_int8) {
280
- for (int i = 0 ; i < 4 ; i++) {
281
- if (i == 0 ) {
282
- reshape_dim4[i] = input_d[i];
283
- } else {
284
- reshape_dim4[i] = 1 ;
285
- if (i < input_dims) {
286
- reshape_dim4[1 ] *= input_d[i];
287
- }
288
- }
289
- }
195
+ // If use tensorrt'oss, the x_dim and x_num_col_dims need change
196
+ if (engine_->use_oss () && engine_->with_ernie () && x_dim.nbDims == 4 &&
197
+ x_dim.d [2 ] == 1 && x_dim.d [3 ] == 1 && x_num_col_dims == 2 ) {
198
+ x_num_col_dims = 1 ;
199
+ }
200
+ PADDLE_ENFORCE_GT (
201
+ x_dim.nbDims , x_num_col_dims,
202
+ platform::errors::InvalidArgument (
203
+ " Params and input dims mismatch. Paddle-TRT FC "
204
+ " converter expects x_dim.nbDims > x_num_col_dims, but "
205
+ " x_dim.nbDims : %d, x_num_col_dims : %d." ,
206
+ x_dim.nbDims , x_num_col_dims));
207
+ // add shuffle before fc
208
+ nvinfer1::Dims reshape_before_fc_dim;
209
+ reshape_before_fc_dim.nbDims = x_num_col_dims + 3 ;
210
+ // padding shape "* x q x 1 x 1"
211
+ for (int i = 0 ; i < reshape_before_fc_dim.nbDims ; i++) {
212
+ reshape_before_fc_dim.d [i] = 1 ;
213
+ }
214
+ for (int i = 0 ; i < x_dim.nbDims ; i++) {
215
+ if (i < x_num_col_dims) {
216
+ reshape_before_fc_dim.d [i] = 0 ;
290
217
} else {
291
- for (int i = 0 ; i < 4 ; i++) {
292
- if (i < input_dims) {
293
- reshape_dim4[i] = input_d[i];
294
- } else {
295
- reshape_dim4[i] = 1 ;
296
- }
218
+ if (x_dim.d [i] < 0 ) {
219
+ reshape_before_fc_dim.d [x_num_col_dims] = -1 ;
220
+ break ;
297
221
}
222
+ reshape_before_fc_dim.d [x_num_col_dims] *= x_dim.d [i];
298
223
}
299
- nvinfer1::Dims4 reshape_dim (reshape_dim4[ 0 ], reshape_dim4[ 1 ],
300
- reshape_dim4[ 2 ], reshape_dim4[ 3 ] );
301
- auto * reshape_layer = TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *X );
302
- reshape_layer-> setReshapeDimensions (reshape_dim);
303
- reshape_itensor = reshape_layer-> getOutput ( 0 );
304
- if (enable_int8) {
305
- engine_-> SetTensorDynamicRange (reshape_itensor, in_scale);
306
- }
224
+ }
225
+ auto * reshape_before_fc_layer = TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *X );
226
+ reshape_before_fc_layer-> setReshapeDimensions (reshape_before_fc_dim );
227
+ reshape_before_fc_layer-> setName (
228
+ ( " shuffle_before_fc(Output: " + output_name + " ) " ). c_str () );
229
+ auto * reshape_itensor = reshape_before_fc_layer-> getOutput ( 0 );
230
+ if (enable_int8) {
231
+ engine_-> SetTensorDynamicRange (reshape_itensor, in_scale);
307
232
}
308
233
regist_fc (reshape_itensor, n_output, weight, bias);
309
234
}
0 commit comments