Skip to content

Commit abe2092

Browse files
authored
Exhaustive search for cuDNN conv. (#14286)
* exhaustive search for cuDNN conv. * Refine code and add unit testing. * Fix model load in fluid/inference and unit testing in conv2d * Follow comments. * Fix compiling test=develop
1 parent f215534 commit abe2092

File tree

17 files changed

+384
-79
lines changed

17 files changed

+384
-79
lines changed

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include <algorithm>
1516
#include <array>
1617
#include <string>
1718
#include <vector>

paddle/fluid/inference/api/analysis_predictor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
// limitations under the License.
1414

1515
#pragma once
16+
#include <algorithm>
17+
#include <map>
1618
#include <string>
1719
#include <vector>
1820
#include "paddle/fluid/framework/naive_executor.h"

paddle/fluid/inference/api/api.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "paddle/fluid/framework/scope.h"
1717
#include "paddle/fluid/inference/api/paddle_inference_api.h"
1818
#include "paddle/fluid/platform/enforce.h"
19-
#include "paddle_inference_api.h"
2019

2120
namespace paddle {
2221

paddle/fluid/inference/api/helper.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,14 @@
1616

1717
#include <glog/logging.h>
1818
#include <sys/time.h>
19+
#include <algorithm>
1920
#include <chrono> // NOLINT
2021
#include <numeric>
2122
#include <sstream>
2223
#include <string>
2324
#include <vector>
25+
#include "paddle/fluid/inference/api/paddle_inference_api.h"
2426
#include "paddle/fluid/string/printf.h"
25-
#include "paddle_inference_api.h"
2627

2728
namespace paddle {
2829
namespace inference {

paddle/fluid/inference/io.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ void ReadBinaryFile(const std::string& filename, std::string* contents) {
5959
bool IsPersistable(const framework::VarDesc* var) {
6060
if (var->Persistable() &&
6161
var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
62-
var->GetType() != framework::proto::VarType::FETCH_LIST) {
62+
var->GetType() != framework::proto::VarType::FETCH_LIST &&
63+
var->GetType() != framework::proto::VarType::RAW) {
6364
return true;
6465
}
6566
return false;

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class TensorRTEngine : public EngineBase {
134134
std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
135135
weight_map;
136136

137-
// TODO: (NHZLX)
137+
// TODO(NHZLX)
138138
// In the normal case, the paddle-trt exists bug when runing the googlenet.
139139
// When there are more than two convolutions of 1 * 1 with the same input, the
140140
// paddle-tensorrt will do the merging optimization, which fuse those conv

paddle/fluid/operators/add_position_encoding_op.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ class AddPositionEncodingKernel : public framework::OpKernel<T> {
6666
x_lod.empty() ? max_seq_len : x_lod[0][i + 1] - x_lod[0][i];
6767
for (int j = 0; j < max_length; ++j) {
6868
for (int k = 0; k < half_size; ++k) {
69-
const double val = (half_size > 1)
70-
? j / pow(10000.0, double(k) / (half_size - 1))
71-
: j / 10000.0;
69+
const double val =
70+
(half_size > 1)
71+
? j / pow(10000.0, static_cast<double>(k) / (half_size - 1))
72+
: j / 10000.0;
7273
dst_ptr[k] = src_ptr[k] * alpha + sin(val) * beta;
7374
dst_ptr[half_size + k] =
7475
src_ptr[half_size + k] * alpha + cos(val) * beta;

0 commit comments

Comments
 (0)