Skip to content

Commit 65b61db

Browse files
authored
Merge pull request #13927 from NHZlX/fix_googlenet_bug_with_rule
Fix googlenet bug with rule
2 parents ea8984c + 5700faf commit 65b61db

File tree

5 files changed

+50
-8
lines changed

5 files changed

+50
-8
lines changed

paddle/fluid/inference/tensorrt/convert/conv2d_op.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,21 @@ namespace paddle {
1818
namespace inference {
1919
namespace tensorrt {
2020

21+
bool to_skip_merging_optimize(TensorRTEngine* engine_,
22+
const std::vector<int>& filters,
23+
const std::vector<int>& strides,
24+
const std::vector<int>& paddings,
25+
std::string input_name) {
26+
if (engine_->itensor_quote_num[input_name] > 0) {
27+
return true;
28+
}
29+
if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 &&
30+
strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0)
31+
engine_->itensor_quote_num[input_name] += 1;
32+
33+
return false;
34+
}
35+
2136
class Conv2dOpConverter : public OpConverter {
2237
public:
2338
void operator()(const framework::proto::OpDesc& op,
@@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter {
3146
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
3247

3348
auto* X = engine_->GetITensor(op_desc.Input("Input").front());
49+
3450
// Declare weights
3551
auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
3652
PADDLE_ENFORCE_NOT_NULL(Y_v);
@@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter {
8399
std::move(weight_tensor);
84100
layer->getOutput(0)->setName(output_name.c_str());
85101
engine_->SetITensor(output_name, layer->getOutput(0));
86-
if (test_mode) {
102+
103+
if (test_mode ||
104+
to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
105+
paddings, op_desc.Input("Input").front())) {
87106
engine_->DeclareOutput(output_name);
88107
}
89108
}

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
133133
buffer_sizes_[name] = 0;
134134
}
135135

136+
bool TensorRTEngine::HasDeclared(const std::string &name) {
137+
return buffer_sizes_.count(name) > 0;
138+
}
139+
136140
void TensorRTEngine::DeclareOutput(const std::string &name) {
137141
PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s",
138142
name);

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase {
9191
const std::string& name);
9292
// Set the itensor_map_[name] as the network's output, and set its name.
9393
void DeclareOutput(const std::string& name);
94+
// Check if the ITensor has been declared
95+
bool HasDeclared(const std::string& name);
9496

9597
// GPU memory address for an ITensor with specific name. One can operate on
9698
// these memory directly for acceleration, for example, output the converted
@@ -132,6 +134,16 @@ class TensorRTEngine : public EngineBase {
132134
std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
133135
weight_map;
134136

137+
// TODO: (NHZLX)
138+
// In the normal case, the paddle-trt exists bug when runing the googlenet.
139+
// When there are more than two convolutions of 1 * 1 with the same input, the
140+
// paddle-tensorrt will do the merging optimization, which fuse those conv
141+
// into
142+
// one conv, and then trigger bug. So, We should use strategy to avoid this
143+
// optimization for the time being. This bug will be fixed in the future.
144+
std::unordered_map<std::string /*name*/, int /*ITensor_quote_num*/>
145+
itensor_quote_num;
146+
135147
private:
136148
// the max batch size
137149
int max_batch_;

paddle/fluid/inference/tests/api/trt_models_tester.cc

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
9393
}
9494
}
9595

96-
TEST(trt_models_test, main) {
97-
std::vector<std::string> infer_models = {"mobilenet", "resnet50",
98-
"resnext50"};
99-
for (auto &model_dir : infer_models) {
100-
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir);
101-
}
96+
TEST(trt_models_test, mobilenet) {
97+
CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet");
98+
}
99+
100+
TEST(trt_models_test, resnet50) {
101+
CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50");
102102
}
103+
104+
TEST(trt_models_test, resnext50) {
105+
CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50");
106+
}
107+
103108
} // namespace paddle

paddle/fluid/operators/tensorrt_engine_op.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
223223

224224
// Add outputs
225225
for (auto& output : output_maps) {
226-
engine->DeclareOutput(output);
226+
if (!engine->HasDeclared(output)) {
227+
engine->DeclareOutput(output);
228+
}
227229
}
228230

229231
engine->FreezeNetwork();

0 commit comments

Comments
 (0)