Skip to content

Commit 9a3d859

Browse files
authored
cherry-pick .Align the code of trt under the develop and release/2.1 branch (#33631)
1 parent bd3aa03 commit 9a3d859

File tree

5 files changed

+187
-201
lines changed

5 files changed

+187
-201
lines changed

paddle/fluid/inference/tensorrt/convert/elementwise_op.cc

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,25 @@ class ElementwiseWeightOpConverter : public OpConverter {
6262
0};
6363
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
6464
0};
65+
66+
nvinfer1::IShuffleLayer* expand_layer = nullptr;
67+
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
68+
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
69+
auto input_dim = X->getDimensions();
70+
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
71+
nvinfer1::Dims expand_shape;
72+
expand_shape.nbDims = 3 + dynamic_shape_offset;
73+
for (int i = 0; i < expand_shape.nbDims; i++) {
74+
if (i < input_dim.nbDims) {
75+
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
76+
} else {
77+
expand_shape.d[i] = 1;
78+
}
79+
}
80+
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
81+
expand_layer->setReshapeDimensions(expand_shape);
82+
X = expand_layer->getOutput(0);
83+
}
6584
if (op_type_ == "add") {
6685
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
6786
engine_, Scale, *X, scale_mode, shift_weights.get(),
@@ -73,7 +92,17 @@ class ElementwiseWeightOpConverter : public OpConverter {
7392
shift_weights.get(), power_weights.get());
7493
layer = scale_layer;
7594
}
76-
95+
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
96+
nvinfer1::Dims squeeze_shape;
97+
squeeze_shape.nbDims = input_dim.nbDims;
98+
for (int i = 0; i < squeeze_shape.nbDims; i++) {
99+
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
100+
}
101+
squeeze_layer =
102+
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
103+
squeeze_layer->setReshapeDimensions(squeeze_shape);
104+
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
105+
}
77106
auto output_name = op_desc.Output("Out")[0];
78107
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name},
79108
test_mode);

paddle/fluid/inference/tensorrt/convert/fc_op.cc

Lines changed: 86 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class FcOpConverter : public OpConverter {
3737
const framework::Scope& scope, bool test_mode) override {
3838
VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias";
3939
framework::OpDesc op_desc(op, nullptr);
40-
40+
auto output_name = op_desc.Output("Out").front();
4141
auto input_names = op_desc.InputNames();
4242
bool with_bias = input_names.size() >= 3;
4343
std::string w_name = "Y";
@@ -48,13 +48,14 @@ class FcOpConverter : public OpConverter {
4848
}
4949
// Declare inputs
5050
auto* X = engine_->GetITensor(op_desc.Input(i_name).front());
51+
auto x_dim = X->getDimensions();
5152
// Declare weights
5253
auto* Y_v = scope.FindVar(op_desc.Input(w_name).front());
5354
PADDLE_ENFORCE_NOT_NULL(
5455
Y_v, platform::errors::NotFound(
5556
"Can not find %s presistale var of fc in scope.", w_name));
5657
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
57-
const int x_num_col_dims =
58+
int x_num_col_dims =
5859
op_desc.HasAttr("x_num_col_dims")
5960
? BOOST_GET_CONST(int, op_desc.GetAttr("x_num_col_dims"))
6061
: (op_desc.HasAttr("in_num_col_dims")
@@ -106,31 +107,61 @@ class FcOpConverter : public OpConverter {
106107
auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
107108
TensorRTEngine::Weight& weight,
108109
TensorRTEngine::Weight& bias) {
109-
nvinfer1::ILayer* fc_layer = nullptr;
110110
if (enable_int8) {
111+
// add conv layer
111112
PADDLE_ENFORCE_EQ(
112113
op_desc.HasAttr("out_threshold"), true,
113114
platform::errors::InvalidArgument(
114115
"must have out threshold in fc layers in int8 mode"));
115116
float out_scale =
116117
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
117118
nvinfer1::DimsHW nv_ksize(1, 1);
118-
fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
119-
nv_ksize, weight.get(), bias.get());
120-
engine_->SetTensorDynamicRange(fc_layer->getOutput(0), out_scale);
121-
} else {
122-
fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs,
123-
n_output, weight.get(), bias.get());
124-
}
125-
126-
auto output_name = op_desc.Output("Out").front();
127-
if (activation_type == "relu") {
128-
nvinfer1::IActivationLayer* relu_layer =
129-
TRT_ENGINE_ADD_LAYER(engine_, Activation, *(fc_layer->getOutput(0)),
130-
nvinfer1::ActivationType::kRELU);
131-
RreplenishLayerAndOutput(relu_layer, "fc", {output_name}, test_mode);
119+
auto* fc_layer_int8 =
120+
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
121+
nv_ksize, weight.get(), bias.get());
122+
engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0), out_scale);
123+
if (activation_type == "relu") {
124+
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
125+
engine_, Activation, *(fc_layer_int8->getOutput(0)),
126+
nvinfer1::ActivationType::kRELU);
127+
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle",
128+
{output_name}, test_mode);
129+
} else {
130+
RreplenishLayerAndOutput(fc_layer_int8, "shuffle_after_fc",
131+
{output_name}, test_mode);
132+
}
132133
} else {
133-
RreplenishLayerAndOutput(fc_layer, "fc", {output_name}, test_mode);
134+
// add fc layer
135+
auto* fc_layer_before =
136+
TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output,
137+
weight.get(), bias.get());
138+
fc_layer_before->setName(
139+
("fc_layer_before(Output: " + output_name + ")").c_str());
140+
// add shuffle after fc
141+
nvinfer1::Dims reshape_after_fc_dim;
142+
if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 &&
143+
x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 1) {
144+
// If use tensorrt'oss, the x_dim and x_num_col_dims need change
145+
reshape_after_fc_dim.nbDims = 4;
146+
} else {
147+
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
148+
}
149+
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
150+
reshape_after_fc_dim.d[i] = 0;
151+
}
152+
auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(
153+
engine_, Shuffle, *fc_layer_before->getOutput(0));
154+
fc_layer_float->setReshapeDimensions(reshape_after_fc_dim);
155+
if (activation_type == "relu") {
156+
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
157+
engine_, Activation, *(fc_layer_float->getOutput(0)),
158+
nvinfer1::ActivationType::kRELU);
159+
RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle",
160+
{output_name}, test_mode);
161+
} else {
162+
RreplenishLayerAndOutput(fc_layer_float, "shuffle_after_fc",
163+
{output_name}, test_mode);
164+
}
134165
}
135166
};
136167

@@ -157,153 +188,47 @@ class FcOpConverter : public OpConverter {
157188
static_cast<void*>(bias_data),
158189
static_cast<size_t>(bias_num)};
159190

160-
if (engine_->with_dynamic_shape()) {
161-
// not NCHW layout, but NLP layout with added 'x 1 x 1'
162-
auto x_dim = X->getDimensions();
163-
if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 &&
164-
x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 2) {
165-
// fc which is just after self attention
166-
regist_fc(X, n_output, weight, bias);
167-
return;
168-
}
169-
PADDLE_ENFORCE_LE(
170-
x_dim.nbDims - x_num_col_dims, 3,
171-
platform::errors::InvalidArgument(
172-
"Params and input dims mismatch. Paddle-TRT FC "
173-
"converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
174-
"x_dim.nbDims = %d, x_num_col_dims = %d.",
175-
x_dim.nbDims, x_num_col_dims));
176-
auto output_name = op_desc.Output("Out").front();
177-
// add shuffle before fc
178-
nvinfer1::Dims reshape_before_fc_dim;
179-
// padding shape "x 1 x 1"
180-
int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
181-
reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
182-
int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
183-
while (padding_length-- > 0) {
184-
reshape_before_fc_dim.d[cur_dim_index--] = 1;
185-
}
186-
while (cur_dim_index >= 0) {
187-
reshape_before_fc_dim.d[cur_dim_index--] = 0;
188-
}
189-
190-
auto* reshape_before_fc_layer =
191-
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
192-
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
193-
reshape_before_fc_layer->setName(
194-
("shuffle_before_fc(Output: " + output_name + ")").c_str());
195-
196-
// add fc layer
197-
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
198-
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
199-
n_output, weight.get(), bias.get());
200-
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
201-
202-
// add shuffle after fc
203-
nvinfer1::Dims reshape_after_fc_dim;
204-
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
205-
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
206-
reshape_after_fc_dim.d[i] = 0;
207-
}
208-
209-
auto* reshape_after_fc_layer =
210-
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
211-
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
212-
213-
if (activation_type == "relu") {
214-
reshape_after_fc_layer->setName(
215-
("shuffle_after_fc(Output: " + output_name + ")").c_str());
216-
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
217-
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
218-
nvinfer1::ActivationType::kRELU);
219-
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
220-
{output_name}, test_mode);
221-
} else {
222-
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
223-
{output_name}, test_mode);
224-
}
225-
return;
191+
// Running the TRT Static Shape mode: x_num_col_dims-1
192+
if (!engine_->with_dynamic_shape()) {
193+
x_num_col_dims--;
226194
}
227-
// in order to handle situations in NLP models(input dims < 3,
228-
// x_num_col_dims != 1, etc.), reshape input to perform FC correctly.
229-
auto* reshape_itensor = X;
230-
int input_dims = X->getDimensions().nbDims;
231-
auto input_d = X->getDimensions().d;
232-
int reshape_dim3[3] = {0};
233-
int reshape_dim4[4] = {0};
234-
PADDLE_ENFORCE_LE(x_num_col_dims, input_dims,
235-
platform::errors::InvalidArgument(
236-
"Params and input dims mismatch. Paddle-TRT FC "
237-
"converter expects x_num_col_dims <= input dims"));
238-
if (x_num_col_dims == 1) {
239-
if (input_dims == 4) {
240-
PADDLE_ENFORCE_EQ(
241-
input_d[3], 1,
242-
platform::errors::InvalidArgument(
243-
"Invalid dimensions. When x_num_col_dims equals to 1 and input "
244-
"dims equals to 4, the last dim of input must be 1, but got %d",
245-
input_d[3]));
246-
}
247-
if (enable_int8) {
248-
reshape_dim3[0] = 1;
249-
for (int i = 0; i < 3; i++) {
250-
reshape_dim3[0] *= input_d[i];
251-
if (i > 0) {
252-
reshape_dim3[i] = 1;
253-
}
254-
}
255-
} else {
256-
for (int i = 0; i < 3; i++) {
257-
if (i < input_dims) {
258-
reshape_dim3[i] = input_d[i];
259-
} else {
260-
reshape_dim3[i] = 1;
261-
}
262-
}
263-
}
264-
265-
nvinfer1::Dims3 reshape_dim(reshape_dim3[0], reshape_dim3[1],
266-
reshape_dim3[2]);
267-
auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
268-
reshape_layer->setReshapeDimensions(reshape_dim);
269-
reshape_itensor = reshape_layer->getOutput(0);
270-
if (enable_int8) {
271-
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
272-
}
273-
} else {
274-
PADDLE_ENFORCE_NE(input_dims, 1,
275-
platform::errors::InvalidArgument(
276-
"Invalid dimensions. When x_num_col_dims equals to "
277-
"2, input_dims should not be 1"));
278-
279-
if (enable_int8) {
280-
for (int i = 0; i < 4; i++) {
281-
if (i == 0) {
282-
reshape_dim4[i] = input_d[i];
283-
} else {
284-
reshape_dim4[i] = 1;
285-
if (i < input_dims) {
286-
reshape_dim4[1] *= input_d[i];
287-
}
288-
}
289-
}
195+
// If use tensorrt'oss, the x_dim and x_num_col_dims need change
196+
if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 &&
197+
x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 2) {
198+
x_num_col_dims = 1;
199+
}
200+
PADDLE_ENFORCE_GT(
201+
x_dim.nbDims, x_num_col_dims,
202+
platform::errors::InvalidArgument(
203+
"Params and input dims mismatch. Paddle-TRT FC "
204+
"converter expects x_dim.nbDims > x_num_col_dims, but "
205+
"x_dim.nbDims : %d, x_num_col_dims : %d.",
206+
x_dim.nbDims, x_num_col_dims));
207+
// add shuffle before fc
208+
nvinfer1::Dims reshape_before_fc_dim;
209+
reshape_before_fc_dim.nbDims = x_num_col_dims + 3;
210+
// padding shape "* x q x 1 x 1"
211+
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
212+
reshape_before_fc_dim.d[i] = 1;
213+
}
214+
for (int i = 0; i < x_dim.nbDims; i++) {
215+
if (i < x_num_col_dims) {
216+
reshape_before_fc_dim.d[i] = 0;
290217
} else {
291-
for (int i = 0; i < 4; i++) {
292-
if (i < input_dims) {
293-
reshape_dim4[i] = input_d[i];
294-
} else {
295-
reshape_dim4[i] = 1;
296-
}
218+
if (x_dim.d[i] < 0) {
219+
reshape_before_fc_dim.d[x_num_col_dims] = -1;
220+
break;
297221
}
222+
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
298223
}
299-
nvinfer1::Dims4 reshape_dim(reshape_dim4[0], reshape_dim4[1],
300-
reshape_dim4[2], reshape_dim4[3]);
301-
auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
302-
reshape_layer->setReshapeDimensions(reshape_dim);
303-
reshape_itensor = reshape_layer->getOutput(0);
304-
if (enable_int8) {
305-
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
306-
}
224+
}
225+
auto* reshape_before_fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
226+
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
227+
reshape_before_fc_layer->setName(
228+
("shuffle_before_fc(Output: " + output_name + ")").c_str());
229+
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
230+
if (enable_int8) {
231+
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
307232
}
308233
regist_fc(reshape_itensor, n_output, weight, bias);
309234
}

0 commit comments

Comments
 (0)