@@ -18,92 +18,139 @@ namespace paddle {
18
18
namespace inference {
19
19
namespace tensorrt {
20
20
21
- bool to_skip_merging_optimize (TensorRTEngine* engine_ ,
21
+ bool to_skip_merging_optimize (TensorRTEngine* engine ,
22
22
const std::vector<int >& filters,
23
23
const std::vector<int >& strides,
24
24
const std::vector<int >& paddings,
25
25
std::string input_name) {
26
- if (engine_ ->itensor_quote_num [input_name] > 0 ) {
26
+ if (engine ->itensor_quote_num [input_name] > 0 ) {
27
27
return true ;
28
28
}
29
29
if (filters[0 ] == 1 && filters[1 ] == 1 && strides[0 ] == 1 &&
30
30
strides[1 ] == 1 && paddings[0 ] == 0 && paddings[1 ] == 0 )
31
- engine_ ->itensor_quote_num [input_name] += 1 ;
31
+ engine ->itensor_quote_num [input_name] += 1 ;
32
32
33
33
return false ;
34
34
}
35
35
36
+ template <typename RegistFunc, typename SetDilationFunc>
37
+ void ConvertConv2d (TensorRTEngine* engine, const framework::proto::OpDesc& op,
38
+ const framework::Scope& scope, bool test_mode,
39
+ RegistFunc fadd_layer, SetDilationFunc fset_dilation,
40
+ const std::string& name) {
41
+ VLOG (3 ) << " convert a fluid " << name << " op to tensorrt layer without bias" ;
42
+
43
+ framework::OpDesc op_desc (op, nullptr );
44
+ PADDLE_ENFORCE_EQ (op_desc.Input (" Input" ).size (), 1 );
45
+ PADDLE_ENFORCE_EQ (op_desc.Input (" Filter" ).size (), 1 ); // Y is a weight
46
+ PADDLE_ENFORCE_EQ (op_desc.Output (" Output" ).size (), 1 );
47
+
48
+ PADDLE_ENFORCE (engine != nullptr );
49
+ auto * X = engine->GetITensor (op_desc.Input (" Input" ).front ());
50
+
51
+ // Declare weights
52
+ auto * Y_v = scope.FindVar (op_desc.Input (" Filter" ).front ());
53
+ PADDLE_ENFORCE_NOT_NULL (Y_v);
54
+ auto * Y_t = Y_v->GetMutable <framework::LoDTensor>();
55
+
56
+ platform::CPUPlace cpu_place;
57
+ std::unique_ptr<framework::LoDTensor> weight_tensor (
58
+ new framework::LoDTensor ());
59
+ weight_tensor->Resize (Y_t->dims ());
60
+ TensorCopySync ((*Y_t), cpu_place, weight_tensor.get ());
61
+
62
+ auto * weight_data = weight_tensor->mutable_data <float >(platform::CPUPlace ());
63
+
64
+ PADDLE_ENFORCE_EQ (weight_tensor->dims ().size (), 4UL );
65
+ const int n_output = weight_tensor->dims ()[0 ];
66
+ const int n_input = weight_tensor->dims ()[1 ];
67
+ const int filter_h = weight_tensor->dims ()[2 ];
68
+ const int filter_w = weight_tensor->dims ()[3 ];
69
+ const int groups = boost::get<int >(op_desc.GetAttr (" groups" ));
70
+ const std::vector<int > dilations =
71
+ boost::get<std::vector<int >>(op_desc.GetAttr (" dilations" ));
72
+ const std::vector<int > strides =
73
+ boost::get<std::vector<int >>(op_desc.GetAttr (" strides" ));
74
+ const std::vector<int > paddings =
75
+ boost::get<std::vector<int >>(op_desc.GetAttr (" paddings" ));
76
+
77
+ nvinfer1::DimsHW nv_ksize (filter_h, filter_w);
78
+ nvinfer1::DimsHW nv_dilations (dilations[0 ], dilations[1 ]);
79
+ nvinfer1::DimsHW nv_strides (strides[0 ], strides[1 ]);
80
+ nvinfer1::DimsHW nv_paddings (paddings[0 ], paddings[1 ]);
81
+
82
+ TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT ,
83
+ static_cast <void *>(weight_data),
84
+ static_cast <size_t >(weight_tensor->numel ())};
85
+
86
+ TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT , nullptr , 0 };
87
+ auto * layer = fadd_layer (const_cast <nvinfer1::ITensor*>(X), n_output, n_input,
88
+ nv_ksize, weight, bias);
89
+ PADDLE_ENFORCE (layer != nullptr );
90
+ layer->setStride (nv_strides);
91
+ layer->setPadding (nv_paddings);
92
+ layer->setNbGroups (groups);
93
+ // set dilations
94
+ fset_dilation (layer, nv_dilations);
95
+
96
+ auto output_name = op_desc.Output (" Output" ).front ();
97
+ layer->setName ((name + " (Output: " + output_name + " )" ).c_str ());
98
+ engine->weight_map [op_desc.Input (" Filter" ).front ()] =
99
+ std::move (weight_tensor);
100
+ layer->getOutput (0 )->setName (output_name.c_str ());
101
+ engine->SetITensor (output_name, layer->getOutput (0 ));
102
+
103
+ if (test_mode ||
104
+ to_skip_merging_optimize (engine, {filter_h, filter_w}, strides, paddings,
105
+ op_desc.Input (" Input" ).front ())) {
106
+ engine->DeclareOutput (output_name);
107
+ }
108
+ }
109
+
36
110
class Conv2dOpConverter : public OpConverter {
37
111
public:
38
112
void operator ()(const framework::proto::OpDesc& op,
39
113
const framework::Scope& scope, bool test_mode) override {
40
- VLOG (3 ) << " convert a fluid conv2d op to tensorrt conv layer without bias" ;
41
-
42
- framework::OpDesc op_desc (op, nullptr );
43
- PADDLE_ENFORCE_EQ (op_desc.Input (" Input" ).size (), 1 );
44
- PADDLE_ENFORCE_EQ (op_desc.Input (" Filter" ).size (), 1 ); // Y is a weight
45
- PADDLE_ENFORCE_EQ (op_desc.Output (" Output" ).size (), 1 );
46
-
47
- auto * X = engine_->GetITensor (op_desc.Input (" Input" ).front ());
48
-
49
- // Declare weights
50
- auto * Y_v = scope.FindVar (op_desc.Input (" Filter" ).front ());
51
- PADDLE_ENFORCE_NOT_NULL (Y_v);
52
- auto * Y_t = Y_v->GetMutable <framework::LoDTensor>();
53
-
54
- platform::CPUPlace cpu_place;
55
- std::unique_ptr<framework::LoDTensor> weight_tensor (
56
- new framework::LoDTensor ());
57
- weight_tensor->Resize (Y_t->dims ());
58
- TensorCopySync ((*Y_t), cpu_place, weight_tensor.get ());
59
-
60
- auto * weight_data =
61
- weight_tensor->mutable_data <float >(platform::CPUPlace ());
62
-
63
- PADDLE_ENFORCE_EQ (weight_tensor->dims ().size (), 4UL );
64
- const int n_output = weight_tensor->dims ()[0 ];
65
- const int filter_h = weight_tensor->dims ()[2 ];
66
- const int filter_w = weight_tensor->dims ()[3 ];
67
-
68
- const int groups = boost::get<int >(op_desc.GetAttr (" groups" ));
69
- const std::vector<int > dilations =
70
- boost::get<std::vector<int >>(op_desc.GetAttr (" dilations" ));
71
- const std::vector<int > strides =
72
- boost::get<std::vector<int >>(op_desc.GetAttr (" strides" ));
73
- const std::vector<int > paddings =
74
- boost::get<std::vector<int >>(op_desc.GetAttr (" paddings" ));
75
-
76
- nvinfer1::DimsHW nv_ksize (filter_h, filter_w);
77
- nvinfer1::DimsHW nv_dilations (dilations[0 ], dilations[1 ]);
78
- nvinfer1::DimsHW nv_strides (strides[0 ], strides[1 ]);
79
- nvinfer1::DimsHW nv_paddings (paddings[0 ], paddings[1 ]);
80
-
81
- TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT ,
82
- static_cast <void *>(weight_data),
83
- weight_tensor->memory_size () / sizeof (float )};
84
-
85
- TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT , nullptr , 0 };
86
- auto * layer = TRT_ENGINE_ADD_LAYER (
87
- engine_, Convolution, *const_cast <nvinfer1::ITensor*>(X), n_output,
88
- nv_ksize, weight.get (), bias.get ());
89
- PADDLE_ENFORCE (layer != nullptr );
90
- layer->setStride (nv_strides);
91
- layer->setPadding (nv_paddings);
92
- layer->setDilation (nv_dilations);
93
- layer->setNbGroups (groups);
94
-
95
- auto output_name = op_desc.Output (" Output" ).front ();
96
- layer->setName ((" conv2d (Output: " + output_name + " )" ).c_str ());
97
- engine_->weight_map [op_desc.Input (" Filter" ).front ()] =
98
- std::move (weight_tensor);
99
- layer->getOutput (0 )->setName (output_name.c_str ());
100
- engine_->SetITensor (output_name, layer->getOutput (0 ));
101
-
102
- if (test_mode ||
103
- to_skip_merging_optimize (engine_, {filter_h, filter_w}, strides,
104
- paddings, op_desc.Input (" Input" ).front ())) {
105
- engine_->DeclareOutput (output_name);
106
- }
114
+ ConvertConv2d (
115
+ engine_, op, scope, test_mode,
116
+ [&](nvinfer1::ITensor* inputs, int n_output, /* Conv output maps */
117
+ int n_input, /* Conv input maps */
118
+ nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
119
+ TensorRTEngine::Weight& bias) -> nvinfer1::IConvolutionLayer* {
120
+ auto * layer =
121
+ TRT_ENGINE_ADD_LAYER (engine_, Convolution, *inputs, n_output,
122
+ ksize, weight.get (), bias.get ());
123
+ return layer;
124
+ },
125
+ [](nvinfer1::IConvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
126
+ layer->setDilation (dilations);
127
+ },
128
+ " conv2d" );
129
+ }
130
+ };
131
+
132
+ class Deconv2dOpConverter : public OpConverter {
133
+ public:
134
+ void operator ()(const framework::proto::OpDesc& op,
135
+ const framework::Scope& scope, bool test_mode) override {
136
+ ConvertConv2d (
137
+ engine_, op, scope, test_mode,
138
+ [&](nvinfer1::ITensor* inputs, int n_output, /* Deconv input maps */
139
+ int n_input, /* Deconv output maps */
140
+ nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
141
+ TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* {
142
+ auto * layer =
143
+ TRT_ENGINE_ADD_LAYER (engine_, Deconvolution, *inputs, n_input,
144
+ ksize, weight.get (), bias.get ());
145
+ return layer;
146
+ },
147
+ [](nvinfer1::IDeconvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
148
+ PADDLE_ENFORCE (
149
+ dilations.d [0 ] == 1 && dilations.d [1 ] == 1 ,
150
+ " Dilations must be (1, 1) for tensorRT, but given (%d, %d)" ,
151
+ dilations.d [0 ], dilations.d [1 ]);
152
+ },
153
+ " conv2d_transpose" );
107
154
}
108
155
};
109
156
@@ -112,3 +159,4 @@ class Conv2dOpConverter : public OpConverter {
112
159
} // namespace paddle
113
160
114
161
REGISTER_TRT_OP_CONVERTER (conv2d, Conv2dOpConverter);
162
+ REGISTER_TRT_OP_CONVERTER (conv2d_transpose, Deconv2dOpConverter);
0 commit comments