Skip to content

Commit 8567d04

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_refine_bilinear_interp
2 parents 5d33481 + 25241e9 commit 8567d04

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1635
-346
lines changed

benchmark/fluid/fluid_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
9797
return train_program, fluid.default_startup_program()
9898
else:
9999
raise ValueError(
100-
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
100+
'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
101101
)
102102

103103

benchmark/fluid/kube_gen_job.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@ def gen_job():
108108
tn_container["ports"][0]["containerPort"] = spreadport
109109

110110
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
111-
envs.append({"name": "TRAINERS", "value": str(args.trainers)})
111+
envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)})
112112
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
113113
envs.append({"name": "ENTRY", "value": args.entry})
114-
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
114+
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
115115
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
116116
# NOTE: these directories below are cluster specific, please modify
117117
# this settings before you run on your own cluster.
@@ -167,16 +167,22 @@ def gen_job():
167167
tn_container["volumeMounts"] = volumeMounts
168168

169169
ps_container["env"] = envs
170-
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"})
170+
ps_container["env"].append({
171+
"name": "PADDLE_TRAINING_ROLE",
172+
"value": "PSERVER"
173+
})
171174
tn_container["env"] = envs
172175
if args.disttype == "pserver":
173176
tn_container["env"].append({
174-
"name": "TRAINING_ROLE",
177+
"name": "PADDLE_TRAINING_ROLE",
175178
"value": "TRAINER"
176179
})
177180
elif args.disttype == "nccl2" or args.disttype == "local":
178181
# NCCL2 have no training role, set to plain WORKER
179-
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"})
182+
tn_container["env"].append({
183+
"name": "PADDLE_TRAINING_ROLE",
184+
"value": "WORKER"
185+
})
180186

181187
os.mkdir(args.jobname)
182188
if args.disttype == "pserver":

cmake/external/mkldnn.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
4545
ELSE()
4646
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
4747
ENDIF()
48-
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
48+
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result")
49+
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
4950
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
5051
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
5152
ExternalProject_Add(

doc/fluid/howto/cluster/fluid_cluster_train_cn.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
168168

169169
第二步,启动Parameter Server:
170170
```bash
171-
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py
171+
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py
172172
```
173173
执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。
174174

175175
第三步,启动Trainer:
176176
```bash
177-
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py
177+
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py
178178
```
179179
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
180180

doc/fluid/howto/cluster/fluid_recordio.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
114114
ret_list.append(f)
115115
return ret_list
116116

117-
trainers = int(os.getenv("TRAINERS"))
118-
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
117+
trainers = int(os.getenv("PADDLE_TRAINERS"))
118+
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
119119
data_file = fluid.layers.io.open_files(
120120
filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0),
121121
thread_num=1,

paddle/fluid/operators/activation_op.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ Tanh Activation Operator.
143143
__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC(
144144
TanhShrink Activation Operator.
145145
146-
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
146+
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
147147
148148
)DOC";
149149

@@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
385385
AddComment(R"DOC(
386386
STanh Activation Operator.
387387
388-
$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
388+
$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
389389
390390
)DOC");
391391
}

paddle/fluid/operators/detection_map_op.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
175175
AddComment(R"DOC(
176176
Detection mAP evaluate operator.
177177
The general steps are as follows. First, calculate the true positive and
178-
false positive according to the input of detection and labels, then
179-
calculate the mAP evaluate value.
180-
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
181-
from the following articles:
182-
https://sanchom.wordpress.com/tag/average-precision/
183-
https://arxiv.org/abs/1512.02325
178+
false positive according to the input of detection and labels, then
179+
calculate the mAP evaluate value.
180+
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
181+
from the following articles:
182+
https://sanchom.wordpress.com/tag/average-precision/
183+
https://arxiv.org/abs/1512.02325
184184
185185
)DOC");
186186
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include <string>
16+
#include "paddle/fluid/operators/mean_op.h"
17+
18+
namespace paddle {
19+
namespace operators {
20+
21+
using framework::DataLayout;
22+
template <typename T>
23+
class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
24+
public:
25+
void Compute(const framework::ExecutionContext& context) const override {
26+
float mean = context.Attr<float>("mean");
27+
float std = context.Attr<float>("std");
28+
auto* tensor = context.Output<framework::Tensor>("Out");
29+
T* data = tensor->mutable_data<T>(context.GetPlace());
30+
31+
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
32+
std::minstd_rand engine;
33+
if (seed == 0) {
34+
seed = std::random_device()();
35+
}
36+
engine.seed(seed);
37+
std::normal_distribution<T> dist(mean, std);
38+
int64_t size = tensor->numel();
39+
for (int64_t i = 0; i < size; ++i) {
40+
data[i] = dist(engine);
41+
}
42+
43+
// The format of output is set as the mkldnn's format
44+
// TODO(@mozga-intel) The format of matrix sets inside the another layers.
45+
tensor->set_layout(DataLayout::kMKLDNN);
46+
tensor->set_format(mkldnn::memory::format::oihw);
47+
}
48+
};
49+
} // namespace operators
50+
} // namespace paddle
51+
52+
namespace ops = paddle::operators;
53+
54+
REGISTER_OP_KERNEL(gaussian_random, MKLDNN, ::paddle::platform::CPUPlace,
55+
ops::GaussianMKLDNNKernel<float>);

paddle/fluid/operators/gaussian_random_op.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ limitations under the License. */
1515
#include <random>
1616
#include "paddle/fluid/framework/op_registry.h"
1717

18+
#ifdef PADDLE_WITH_MKLDNN
19+
#include "paddle/fluid/platform/mkldnn_helper.h"
20+
#endif
21+
1822
namespace paddle {
1923
namespace operators {
2024

@@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
6266
protected:
6367
framework::OpKernelType GetExpectedKernelType(
6468
const framework::ExecutionContext& ctx) const override {
69+
framework::LibraryType library{framework::LibraryType::kPlain};
70+
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
71+
72+
#ifdef PADDLE_WITH_MKLDNN
73+
if (library == framework::LibraryType::kPlain &&
74+
platform::CanMKLDNNBeUsed(ctx)) {
75+
library = framework::LibraryType::kMKLDNN;
76+
layout = framework::DataLayout::kMKLDNN;
77+
}
78+
#endif
79+
6580
return framework::OpKernelType(
6681
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
67-
ctx.device_context());
82+
ctx.device_context(), layout, library);
6883
}
6984
};
7085

@@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
95110
"(int, default 5(FP32)) "
96111
"Output data type.")
97112
.SetDefault(framework::proto::VarType::FP32);
98-
113+
AddAttr<bool>("use_mkldnn",
114+
"(bool, default false) Only used in mkldnn kernel")
115+
.SetDefault(false);
99116
AddComment(R"DOC(
100117
GaussianRandom Operator.
101118

paddle/fluid/operators/math/concat.cu

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -22,43 +22,24 @@ namespace paddle {
2222
namespace operators {
2323
namespace math {
2424

25-
template <typename T>
26-
__device__ T upper_bound(const T* first, T count, T val) {
27-
const T* orig = first;
28-
const T* it = nullptr;
29-
T step = 0;
30-
while (count > 0) {
31-
it = first;
32-
step = count / 2;
33-
it += step;
34-
if (!(val < *it)) {
35-
first = ++it;
36-
count -= step + 1;
37-
} else {
38-
count = step;
39-
}
40-
}
41-
return first - orig;
42-
}
43-
4425
template <typename T>
4526
__global__ void KernelConcat(T** inputs, const int* input_cols, int col_size,
4627
const int output_rows, const int output_cols,
4728
T* output) {
4829
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
49-
int segment = upper_bound<int>(input_cols, col_size, tid_x) - 1;
50-
51-
int curr_offset = input_cols[segment];
52-
int curr_segment = segment;
30+
int curr_segment = 0;
31+
int curr_offset = input_cols[0];
5332
for (; tid_x < output_cols; tid_x += blockDim.x * gridDim.x) {
54-
T curr_col_offset;
55-
while ((curr_col_offset = input_cols[curr_segment + 1]) <= tid_x) {
33+
int curr_col_offset = input_cols[curr_segment + 1];
34+
while (curr_col_offset <= tid_x) {
5635
curr_offset = curr_col_offset;
5736
++curr_segment;
37+
curr_col_offset = input_cols[curr_segment + 1];
5838
}
5939

6040
int local_col = tid_x - curr_offset;
6141
int segment_width = curr_col_offset - curr_offset;
42+
6243
T* input_ptr = inputs[curr_segment];
6344
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
6445
for (; tid_y < output_rows; tid_y += blockDim.y * gridDim.y)
@@ -89,14 +70,14 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
8970
const int in_col, const int* out_cols,
9071
int out_cols_size, T** outputs_data) {
9172
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
92-
int segment = upper_bound<int>(out_cols, out_cols_size, tid_x) - 1;
93-
int curr_offset = out_cols[segment];
94-
int curr_segment = segment;
73+
int curr_segment = 0;
74+
int curr_offset = out_cols[0];
9575
for (; tid_x < in_col; tid_x += blockDim.x * gridDim.x) {
96-
T curr_col_offset;
97-
while ((curr_col_offset = out_cols[curr_segment + 1]) <= tid_x) {
76+
int curr_col_offset = out_cols[curr_segment + 1];
77+
while (curr_col_offset <= tid_x) {
9878
curr_offset = curr_col_offset;
9979
++curr_segment;
80+
curr_col_offset = out_cols[curr_segment + 1];
10081
}
10182

10283
int local_col = tid_x - curr_offset;
@@ -228,7 +209,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
228209

229210
outputs_cols[0] = 0;
230211
for (int i = 0; i < o_num; ++i) {
231-
int t_col = outputs->at(i)->numel() / out_row;
212+
int t_col = ref_inputs.at(i)->numel() / out_row;
232213
if (sameShape) {
233214
if (t_col != out0_col) sameShape = false;
234215
}

0 commit comments

Comments
 (0)