Skip to content

Commit 245005d

Browse files
[Paddle-TRT][cherry pick] Slice to 2.3 (#44757)
* slice_to_2.3
1 parent 7cdce09 commit 245005d

File tree

6 files changed

+494
-252
lines changed

6 files changed

+494
-252
lines changed

paddle/fluid/inference/tensorrt/convert/fc_op.cc

Lines changed: 143 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -34,51 +34,97 @@ namespace tensorrt {
3434
class FcOpConverter : public OpConverter {
3535
public:
3636
nvinfer1::ILayer* reshape_before_fc(nvinfer1::ITensor* before_fc,
37-
nvinfer1::Dims x_dim, int x_num_col_dims,
37+
nvinfer1::Dims x_dim,
38+
int x_num_col_dims,
3839
std::string output_name) {
3940
// add shuffle before fc
4041
nvinfer1::Dims reshape_before_fc_dim;
4142
reshape_before_fc_dim.nbDims = x_num_col_dims + 3;
4243
// padding shape "* x q x 1 x 1"
43-
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
44-
reshape_before_fc_dim.d[i] = 1;
45-
}
46-
for (int i = 0; i < x_dim.nbDims; i++) {
47-
if (i < x_num_col_dims) {
48-
reshape_before_fc_dim.d[i] = 0;
49-
} else {
50-
if (x_dim.d[i] < 0) {
51-
reshape_before_fc_dim.d[x_num_col_dims] = -1;
52-
break;
44+
45+
nvinfer1::ITensor* filal_reshape_before_fc_shape_tensor = nullptr;
46+
47+
if (!engine_->with_dynamic_shape()) {
48+
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
49+
reshape_before_fc_dim.d[i] = 1;
50+
}
51+
for (int i = 0; i < x_dim.nbDims; i++) {
52+
if (i < x_num_col_dims) {
53+
reshape_before_fc_dim.d[i] = 0;
54+
} else {
55+
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
56+
}
57+
}
58+
} else {
59+
std::vector<nvinfer1::ITensor*> reshape_before_fc_shape_tensor;
60+
nvinfer1::ITensor* input_shape_tensor = Shape(before_fc);
61+
62+
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
63+
reshape_before_fc_shape_tensor.push_back(Add1DConstantLayer(1));
64+
}
65+
for (int i = 0; i < x_dim.nbDims; i++) {
66+
if (i < x_num_col_dims) {
67+
reshape_before_fc_shape_tensor[i] =
68+
GetEleTensorOfShape(input_shape_tensor, i);
69+
} else {
70+
reshape_before_fc_shape_tensor[x_num_col_dims] =
71+
Prod(GetEleTensorOfShape(input_shape_tensor, i),
72+
reshape_before_fc_shape_tensor[x_num_col_dims]);
73+
// If not set, test_trt_matmul_quant_dequant in trt 6015 will fail
74+
reshape_before_fc_shape_tensor[x_num_col_dims]->setType(
75+
nvinfer1::DataType::kINT32);
5376
}
54-
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
5577
}
78+
filal_reshape_before_fc_shape_tensor =
79+
Concat(reshape_before_fc_shape_tensor);
5680
}
81+
5782
auto* reshape_before_fc_layer =
5883
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *before_fc);
59-
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
84+
if (!engine_->with_dynamic_shape()) {
85+
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
86+
} else {
87+
reshape_before_fc_layer->setInput(1,
88+
*filal_reshape_before_fc_shape_tensor);
89+
}
6090
reshape_before_fc_layer->setName(
6191
("fc_op_reshape_before_fc: Shuffle (Output: " + output_name + ")")
6292
.c_str());
6393
return reshape_before_fc_layer;
6494
}
6595

6696
nvinfer1::ILayer* reshape_after_fc(nvinfer1::ITensor* after_fc,
67-
nvinfer1::Dims x_dim, int x_num_col_dims) {
97+
nvinfer1::Dims x_dim,
98+
int x_num_col_dims) {
6899
// add shuffle after fc
69100
nvinfer1::Dims reshape_after_fc_dim;
70101
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
71-
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
72-
reshape_after_fc_dim.d[i] = 0;
102+
103+
nvinfer1::ITensor* filal_reshape_after_fc_shape_tensor = nullptr;
104+
if (!engine_->with_dynamic_shape()) {
105+
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
106+
reshape_after_fc_dim.d[i] = 0;
107+
}
108+
} else {
109+
std::vector<int> gather_indices(x_num_col_dims + 1);
110+
std::iota(gather_indices.begin(), gather_indices.end(), 0);
111+
filal_reshape_after_fc_shape_tensor =
112+
Gather(Shape(after_fc), gather_indices);
73113
}
114+
74115
auto* reshape_after_fc_layer =
75116
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *after_fc);
76-
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
117+
if (!engine_->with_dynamic_shape()) {
118+
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
119+
} else {
120+
reshape_after_fc_layer->setInput(1, *filal_reshape_after_fc_shape_tensor);
121+
}
77122
return reshape_after_fc_layer;
78123
}
79124

80125
void operator()(const framework::proto::OpDesc& op,
81-
const framework::Scope& scope, bool test_mode) override {
126+
const framework::Scope& scope,
127+
bool test_mode) override {
82128
VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias";
83129
framework::OpDesc op_desc(op, nullptr);
84130
auto output_name = op_desc.Output("Out").front();
@@ -96,8 +142,9 @@ class FcOpConverter : public OpConverter {
96142
// Declare weights
97143
auto* Y_v = scope.FindVar(op_desc.Input(w_name).front());
98144
PADDLE_ENFORCE_NOT_NULL(
99-
Y_v, platform::errors::NotFound(
100-
"Can not find %s presistale var of fc in scope.", w_name));
145+
Y_v,
146+
platform::errors::NotFound(
147+
"Can not find %s presistale var of fc in scope.", w_name));
101148
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
102149
int x_num_col_dims =
103150
op_desc.HasAttr("x_num_col_dims")
@@ -128,7 +175,8 @@ class FcOpConverter : public OpConverter {
128175
}
129176
weight_data = engine_->GetWeightCPUData(op_desc.Input(w_name).front(), Y_t);
130177

131-
PADDLE_ENFORCE_EQ(Y_t->dims().size(), 2UL,
178+
PADDLE_ENFORCE_EQ(Y_t->dims().size(),
179+
2UL,
132180
platform::errors::InvalidArgument(
133181
"The fc's weight should be a matrix with 2 dims, but "
134182
"it's %d-dimensional.",
@@ -143,25 +191,31 @@ class FcOpConverter : public OpConverter {
143191
}
144192
};
145193

146-
auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
194+
auto regist_fc = [&](nvinfer1::ITensor* inputs,
195+
int n_output,
147196
TensorRTEngine::Weight& weight,
148197
TensorRTEngine::Weight& bias) {
149198
if (enable_int8 || support_int8) {
150199
// add conv layer
151200
float out_scale = 0;
152201
if (enable_int8) {
153202
PADDLE_ENFORCE_EQ(
154-
op_desc.HasAttr("out_threshold"), true,
203+
op_desc.HasAttr("out_threshold"),
204+
true,
155205
platform::errors::InvalidArgument(
156206
"must have out threshold in fc layers in int8 mode"));
157207
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
158208
} else {
159209
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("Out"));
160210
}
161211
nvinfer1::DimsHW nv_ksize(1, 1);
162-
auto* fc_layer_int8 =
163-
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
164-
nv_ksize, weight.get(), bias.get());
212+
auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
213+
Convolution,
214+
*inputs,
215+
n_output,
216+
nv_ksize,
217+
weight.get(),
218+
bias.get());
165219
fc_layer_int8->setName(
166220
("fc_op_int8_conv1x1: Convolution (Output: " + output_name + ")")
167221
.c_str());
@@ -174,21 +228,29 @@ class FcOpConverter : public OpConverter {
174228
.c_str());
175229
engine_->SetTensorDynamicRange(fc_after_reshape_int8->getOutput(0),
176230
out_scale);
177-
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
178-
engine_, Activation, *(fc_after_reshape_int8->getOutput(0)),
179-
nvinfer1::ActivationType::kRELU);
180-
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle",
181-
{output_name}, test_mode);
231+
nvinfer1::IActivationLayer* relu_layer_int8 =
232+
TRT_ENGINE_ADD_LAYER(engine_,
233+
Activation,
234+
*(fc_after_reshape_int8->getOutput(0)),
235+
nvinfer1::ActivationType::kRELU);
236+
RreplenishLayerAndOutput(relu_layer_int8,
237+
"relu_after_fc_shuffle",
238+
{output_name},
239+
test_mode);
182240
} else {
183241
RreplenishLayerAndOutput(fc_after_reshape_int8,
184242
"fc_op_int8_reshape_after_fc: Shuffle",
185-
{output_name}, test_mode);
243+
{output_name},
244+
test_mode);
186245
}
187246
} else {
188247
// add fc layer
189-
auto* fc_layer_float =
190-
TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output,
191-
weight.get(), bias.get());
248+
auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(engine_,
249+
FullyConnected,
250+
*inputs,
251+
n_output,
252+
weight.get(),
253+
bias.get());
192254
fc_layer_float->setName(
193255
("fc_op_float: FullyConnected (Output: " + output_name + ")")
194256
.c_str());
@@ -198,14 +260,20 @@ class FcOpConverter : public OpConverter {
198260
fc_after_reshape_float->setName(
199261
("float_reshape_after_fc: Shuffle (Output: " + output_name + ")")
200262
.c_str());
201-
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
202-
engine_, Activation, *(fc_after_reshape_float->getOutput(0)),
203-
nvinfer1::ActivationType::kRELU);
204-
RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle",
205-
{output_name}, test_mode);
263+
nvinfer1::IActivationLayer* relu_layer_float =
264+
TRT_ENGINE_ADD_LAYER(engine_,
265+
Activation,
266+
*(fc_after_reshape_float->getOutput(0)),
267+
nvinfer1::ActivationType::kRELU);
268+
RreplenishLayerAndOutput(relu_layer_float,
269+
"relu_after_fc_shuffle",
270+
{output_name},
271+
test_mode);
206272
} else {
207-
RreplenishLayerAndOutput(fc_after_reshape_float, "shuffle_after_fc",
208-
{output_name}, test_mode);
273+
RreplenishLayerAndOutput(fc_after_reshape_float,
274+
"shuffle_after_fc",
275+
{output_name},
276+
test_mode);
209277
}
210278
}
211279
};
@@ -255,15 +323,20 @@ class FcOpConverter : public OpConverter {
255323
if (enable_int8 || support_int8) {
256324
// add conv1x1 layer
257325
nvinfer1::DimsHW nv_ksize(1, 1);
258-
auto* fc_layer_int8 =
259-
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *X, n_output, nv_ksize,
260-
weight.get(), bias.get());
326+
auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
327+
Convolution,
328+
*X,
329+
n_output,
330+
nv_ksize,
331+
weight.get(),
332+
bias.get());
261333
if (activation_type == "relu") {
262334
fc_layer_int8->setName(
263335
("ernie_fc_op_int8: Convolution (Output: " + output_name + ")")
264336
.c_str());
265337
PADDLE_ENFORCE_EQ(
266-
op_desc.HasAttr("out_threshold"), true,
338+
op_desc.HasAttr("out_threshold"),
339+
true,
267340
platform::errors::InvalidArgument(
268341
"must have out threshold in fc layers in int8 mode"));
269342
float out_scale = 0;
@@ -275,15 +348,20 @@ class FcOpConverter : public OpConverter {
275348
}
276349
engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0),
277350
out_scale);
278-
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
279-
engine_, Activation, *(fc_layer_int8->getOutput(0)),
280-
nvinfer1::ActivationType::kRELU);
281-
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_ernie_fc_int8",
282-
{output_name}, test_mode);
351+
nvinfer1::IActivationLayer* relu_layer_int8 =
352+
TRT_ENGINE_ADD_LAYER(engine_,
353+
Activation,
354+
*(fc_layer_int8->getOutput(0)),
355+
nvinfer1::ActivationType::kRELU);
356+
RreplenishLayerAndOutput(relu_layer_int8,
357+
"relu_after_ernie_fc_int8",
358+
{output_name},
359+
test_mode);
283360
} else {
284361
RreplenishLayerAndOutput(fc_layer_int8,
285362
"ernie_fc_op_int8: Convolution",
286-
{output_name}, test_mode);
363+
{output_name},
364+
test_mode);
287365
}
288366
} else {
289367
// add fc layer
@@ -292,25 +370,30 @@ class FcOpConverter : public OpConverter {
292370
if (activation_type == "relu") {
293371
fc_layer_float->setName(
294372
("ernie_fc_op_float: (Output: " + output_name + ")").c_str());
295-
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
296-
engine_, Activation, *(fc_layer_float->getOutput(0)),
297-
nvinfer1::ActivationType::kRELU);
373+
nvinfer1::IActivationLayer* relu_layer_float =
374+
TRT_ENGINE_ADD_LAYER(engine_,
375+
Activation,
376+
*(fc_layer_float->getOutput(0)),
377+
nvinfer1::ActivationType::kRELU);
298378
RreplenishLayerAndOutput(relu_layer_float,
299-
"relu_after_ernie_fc_float", {output_name},
379+
"relu_after_ernie_fc_float",
380+
{output_name},
300381
test_mode);
301382
} else {
302-
RreplenishLayerAndOutput(fc_layer_float, "ernie_fc_op_float",
303-
{output_name}, test_mode);
383+
RreplenishLayerAndOutput(
384+
fc_layer_float, "ernie_fc_op_float", {output_name}, test_mode);
304385
}
305386
}
306387
} else { // need reshape input before and after fc
307388
PADDLE_ENFORCE_GT(
308-
x_dim.nbDims, x_num_col_dims,
389+
x_dim.nbDims,
390+
x_num_col_dims,
309391
platform::errors::InvalidArgument(
310392
"Params and input dims mismatch. Paddle-TRT FC "
311393
"converter expects x_dim.nbDims > x_num_col_dims, but "
312394
"x_dim.nbDims : %d, x_num_col_dims : %d.",
313-
x_dim.nbDims, x_num_col_dims));
395+
x_dim.nbDims,
396+
x_num_col_dims));
314397
auto* reshape_before_fc_layer =
315398
reshape_before_fc(X, x_dim, x_num_col_dims, output_name);
316399
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);

0 commit comments

Comments
 (0)