Skip to content

Commit f7c629d

Browse files
authored
Revert "CHERRY_PICK: TRT int8: refine trt int8 for dynamic range set (#21112) (#21449)" (#21619)
This reverts commit 0473cdb.
1 parent d0943db commit f7c629d

File tree

13 files changed

+36
-138
lines changed

13 files changed

+36
-138
lines changed

paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ void DeleteQuantDequantOpPass::ApplyImpl(ir::Graph* graph) const {
3939
patterns::DeleteQuantDequantOpPattern pattern(gpd.mutable_pattern(),
4040
pattern_name);
4141
pattern();
42-
auto* scope = param_scope();
4342

4443
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
4544
Graph* g) {
@@ -48,29 +47,10 @@ void DeleteQuantDequantOpPass::ApplyImpl(ir::Graph* graph) const {
4847
std::string any_op_out_name = any_op_out->Var()->Name();
4948
std::string quant_dequant_op_out_name = quant_dequant_op_out->Var()->Name();
5049

51-
std::string input_scale_var_name =
52-
quant_dequant_op->Op()->Input("InScale").front();
53-
const LoDTensor& input_scale_tensor =
54-
scope->FindVar(input_scale_var_name)->Get<LoDTensor>();
55-
56-
const float* input_scale_data = input_scale_tensor.data<float>();
57-
float input_scale = input_scale_data[0];
5850
auto* any_op2_desc = any_op2->Op();
5951
// auto input_args_names = any_op2_desc->InputArgumentNames();
6052
auto var_map = any_op2_desc->Inputs();
61-
std::string arg_name = "";
62-
for (auto& name_m : var_map) {
63-
if (std::find(name_m.second.begin(), name_m.second.end(),
64-
quant_dequant_op_out_name) != name_m.second.end()) {
65-
arg_name = name_m.first;
66-
}
67-
}
68-
CHECK(arg_name.size() > 0) << "can not find the input "
69-
<< quant_dequant_op_out_name;
70-
any_op2_desc->SetAttr("enable_int8", true);
71-
any_op2_desc->SetAttr(arg_name + "_scale", input_scale);
7253

73-
// modify the any_op2's inputs
7454
for (auto& name_m : var_map) {
7555
if (std::find(name_m.second.begin(), name_m.second.end(),
7656
quant_dequant_op_out_name) != name_m.second.end()) {
@@ -85,7 +65,6 @@ void DeleteQuantDequantOpPass::ApplyImpl(ir::Graph* graph) const {
8565
any_op2_desc->Flush();
8666
}
8767
}
88-
any_op2_desc->Flush();
8968
// Delete the unneeded nodes.
9069
GraphSafeRemoveNodes(graph,
9170
{quant_dequant_op, quant_dequant_op_out,

paddle/fluid/framework/ir/fc_fuse_pass.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
9999
auto* mul_op_desc = mul->Op();
100100
if (mul_op_desc->HasAttr("enable_int8")) {
101101
desc.SetAttr("enable_int8", mul_op_desc->GetAttr("enable_int8"));
102-
desc.SetAttr("Input_scale", mul_op_desc->GetAttr("X_scale"));
102+
desc.SetAttr("input_scale", mul_op_desc->GetAttr("input_scale"));
103103
desc.SetAttr("weight_scale", mul_op_desc->GetAttr("weight_scale"));
104104
if (mul_op_desc->HasAttr("out_scale"))
105105
desc.SetAttr("out_scale", mul_op_desc->GetAttr("out_scale"));

paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,24 +140,22 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
140140

141141
framework::OpDesc new_op_desc(base_op_desc, nullptr);
142142
new_op_desc.SetType(quantized_op_type);
143-
new_op_desc.SetAttr("enable_int8", true);
144143

145144
if (quantized_op_type == "conv2d" ||
146145
quantized_op_type == "conv2d_fusion" ||
147146
quantized_op_type == "depthwise_conv2d") {
148147
new_op_desc.SetInput("Input", {new_input});
149-
new_op_desc.SetAttr("Input_scale", input_scale);
150148
new_op_desc.SetOutput("Output", {new_output});
151149
} else if (quantized_op_type == "fc") {
152150
new_op_desc.SetInput("Input", {new_input});
153-
new_op_desc.SetAttr("Input_scale", input_scale);
154151
new_op_desc.SetOutput("Out", {new_output});
155152
} else if (quantized_op_type == "mul") {
156153
new_op_desc.SetInput("X", {new_input});
157-
new_op_desc.SetAttr("X_scale", input_scale);
158154
new_op_desc.SetOutput("Out", {new_output});
159155
}
160156

157+
new_op_desc.SetAttr("enable_int8", true);
158+
new_op_desc.SetAttr("input_scale", input_scale);
161159
new_op_desc.SetAttr("weight_scale", weight_scale);
162160
new_op_desc.Flush();
163161
auto* new_op = graph->CreateOpNode(&new_op_desc);

paddle/fluid/inference/api/paddle_pass_builder.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,9 @@ const std::vector<std::string> kTRTSubgraphPasses({
7676
"shuffle_channel_detect_pass", //
7777
"quant_conv2d_dequant_fuse_pass", //
7878
"delete_quant_dequant_op_pass", //
79-
"conv_bn_fuse_pass", //
80-
"fc_fuse_pass", //
81-
"tensorrt_subgraph_pass", //
82-
"conv_bn_fuse_pass", //
79+
// "fc_fuse_pass", //
80+
"tensorrt_subgraph_pass", //
81+
"conv_bn_fuse_pass", //
8382
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
8483
// guaranteed at least v7
8584
"conv_elementwise_add_act_fuse_pass", //

paddle/fluid/inference/tensorrt/convert/conv2d_op.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
4040

4141
if (enable_int8) {
4242
#if IS_TRT_VERSION_GE(5000)
43-
CHECK(op_desc.HasAttr("Input_scale"));
44-
float in_scale = boost::get<float>(op_desc.GetAttr("Input_scale"));
43+
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
4544
auto weight_scale =
4645
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
4746
weight_data = engine->GetWeightCPUData(op_desc.Input("Filter").front(), Y_t,
@@ -90,6 +89,13 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
9089
layer->getOutput(0)->setName(output_name.c_str());
9190
engine->SetITensor(output_name, layer->getOutput(0));
9291

92+
#if IS_TRT_VERSION_GE(5000)
93+
if (enable_int8) {
94+
float output_scale = boost::get<float>(op_desc.GetAttr("out_scale"));
95+
engine->SetTensorDynamicRange(layer->getOutput(0), output_scale);
96+
}
97+
#endif
98+
9399
if (test_mode) {
94100
engine->DeclareOutput(output_name);
95101
}

paddle/fluid/inference/tensorrt/convert/elementwise_op.cc

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,10 @@ class ElementwiseWeightOpConverter : public OpConverter {
110110
auto output_name = op_desc.Output("Out")[0];
111111
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name},
112112
test_mode);
113-
if (op_desc.HasAttr("enable_int8")) {
113+
if (op_desc.HasAttr("out_scale")) {
114114
#if IS_TRT_VERSION_GE(5000)
115-
CHECK(op_desc.HasAttr("X_scale"));
116-
float x_scale = boost::get<float>(op_desc.GetAttr("X_scale"));
117-
engine_->SetTensorDynamicRange(X, x_scale);
115+
float out_scale = boost::get<float>(op_desc.GetAttr("out_scale"));
116+
engine_->SetTensorDynamicRange(layer->getOutput(0), out_scale);
118117
#endif
119118
}
120119
}
@@ -170,14 +169,10 @@ class ElementwiseTensorOpConverter : public OpConverter {
170169
layer = plugin_layer;
171170
}
172171
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
173-
if (op_desc.HasAttr("enable_int8")) {
172+
if (op_desc.HasAttr("out_scale")) {
174173
#if IS_TRT_VERSION_GE(5000)
175-
CHECK(op_desc.HasAttr("X_scale"));
176-
CHECK(op_desc.HasAttr("Y_scale"));
177-
float x_scale = boost::get<float>(op_desc.GetAttr("X_scale"));
178-
float y_scale = boost::get<float>(op_desc.GetAttr("Y_scale"));
179-
engine_->SetTensorDynamicRange(X, x_scale);
180-
engine_->SetTensorDynamicRange(Y, y_scale);
174+
float out_scale = boost::get<float>(op_desc.GetAttr("out_scale"));
175+
engine_->SetTensorDynamicRange(layer->getOutput(0), out_scale);
181176
#endif
182177
}
183178
}

paddle/fluid/inference/tensorrt/convert/fc_op.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ class FcOpConverter : public OpConverter {
7777
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
7878
if (enable_int8) {
7979
#if IS_TRT_VERSION_GE(5000)
80-
CHECK(op_desc.HasAttr(i_name + "_scale"));
81-
float in_scale = boost::get<float>(op_desc.GetAttr(i_name + "_scale"));
80+
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
8281
auto weight_scale =
8382
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
8483
weight_data = engine_->GetWeightCPUData(op_desc.Input(w_name).front(),
@@ -136,6 +135,12 @@ class FcOpConverter : public OpConverter {
136135
auto output_name = op_desc.Output("Out").front();
137136

138137
RreplenishLayerAndOutput(layer, "fc", {output_name}, test_mode);
138+
if (enable_int8) {
139+
#if IS_TRT_VERSION_GE(5000)
140+
float out_scale = boost::get<float>(op_desc.GetAttr("out_scale"));
141+
engine_->SetTensorDynamicRange(layer->getOutput(0), out_scale);
142+
#endif
143+
}
139144
}
140145
};
141146

paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,6 @@ class LeakyReluOpConverter : public OpConverter {
4242
engine_, Activation, *input, nvinfer1::ActivationType::kLEAKY_RELU);
4343
layer->setAlpha(alpha);
4444
output_layer = layer;
45-
46-
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
47-
if (enable_int8) {
48-
CHECK(op_desc.HasAttr("X_scale"));
49-
float in_scale = boost::get<float>(op_desc.GetAttr("X_scale"));
50-
engine_->SetTensorDynamicRange(input, in_scale);
51-
}
5245
#else
5346
platform::CPUPlace place;
5447
std::unique_ptr<framework::LoDTensor> alpha_tensor(

paddle/fluid/inference/tensorrt/convert/pool2d_op.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,10 @@ class Pool2dOpConverter : public OpConverter {
160160
auto output_name = op_desc.Output("Out")[0];
161161
RreplenishLayerAndOutput(layer, "pool2d", {output_name}, test_mode);
162162

163-
if (op_desc.HasAttr("enable_int8")) {
163+
if (op_desc.HasAttr("out_scale")) {
164164
#if IS_TRT_VERSION_GE(5000)
165-
CHECK(op_desc.HasAttr("X_scale"));
166-
float input_scale = boost::get<float>(op_desc.GetAttr("X_scale"));
167-
engine_->SetTensorDynamicRange(input1, input_scale);
165+
float out_scale = boost::get<float>(op_desc.GetAttr("out_scale"));
166+
engine_->SetTensorDynamicRange(layer->getOutput(0), out_scale);
168167
#endif
169168
}
170169
}

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -104,31 +104,12 @@ void TensorRTEngine::FreezeNetwork() {
104104

105105
for (auto &t : all_t) {
106106
if (!quant_dynamic_range_.count(t)) {
107-
VLOG(3)
107+
LOG(WARNING)
108108
<< "We are in trt int8 mode(not calibration), scale not setted"
109109
<< " for tensor " << t->getName()
110110
<< ", this might be ok when trt does not need this range";
111111
}
112112
}
113-
std::unordered_set<std::string> all_out_t_name;
114-
for (int i = 0; i < infer_network_->getNbOutputs(); i++) {
115-
auto *temp = infer_network_->getOutput(i);
116-
temp->setDynamicRange(-1, 1);
117-
all_out_t_name.insert(temp->getName());
118-
}
119-
120-
for (int i = 0; i < infer_network_->getNbLayers(); i++) {
121-
auto layer = infer_network_->getLayer(i);
122-
for (int j = 0; j < layer->getNbOutputs(); j++) {
123-
auto *temp_out = layer->getOutput(j);
124-
if (std::find(all_out_t_name.begin(), all_out_t_name.end(),
125-
temp_out->getName()) != all_out_t_name.end()) {
126-
layer->setPrecision(nvinfer1::DataType::kFLOAT);
127-
layer->setOutputType(j, nvinfer1::DataType::kFLOAT);
128-
}
129-
}
130-
}
131-
132113
#endif
133114
}
134115
}
@@ -234,6 +215,11 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
234215
(scale.size() == 1 || scale.size() == static_cast<size_t>(w_dims[0]));
235216
PADDLE_ENFORCE(valid_scale_size, "TRT int8 quant: invalid scale size");
236217
for (int i = 0; i < weight_tensor->numel(); i++) {
218+
bool is_valid_int8 =
219+
((weight_data[i] >= -128) && (weight_data[i] <= 127));
220+
PADDLE_ENFORCE(is_valid_int8,
221+
"We are in anakin subgraph int8 mode, the weight of conv "
222+
"should be in range [-128, 127]");
237223
if (scale.size() == 1) {
238224
weight_data[i] *= (scale[0] / 127);
239225
} else {

0 commit comments

Comments
 (0)