Skip to content

Commit b28b2f1

Browse files
QiJunereyoung
authored andcommitted
refine test_recognize_digits_mlp and format codes (#5937)
1 parent d89ff5b commit b28b2f1

File tree

17 files changed

+231
-273
lines changed

17 files changed

+231
-273
lines changed

paddle/capi/Matrix.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
5555
}
5656

5757
PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
58-
paddle_real* value) {
58+
paddle_real* value) {
5959
if (mat == nullptr || value == nullptr) return kPD_NULLPTR;
6060
auto ptr = cast(mat);
6161
if (ptr->mat == nullptr) return kPD_NULLPTR;
@@ -75,7 +75,7 @@ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
7575
}
7676

7777
PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat,
78-
paddle_real* result) {
78+
paddle_real* result) {
7979
if (mat == nullptr || result == nullptr) return kPD_NULLPTR;
8080
auto ptr = cast(mat);
8181
if (ptr->mat == nullptr) return kPD_NULLPTR;

paddle/capi/matrix.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ PD_API paddle_error paddle_matrix_set_row(paddle_matrix mat,
7979
* @note value should contain enough element of data to init the mat
8080
*/
8181
PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
82-
paddle_real* value);
82+
paddle_real* value);
8383

8484
/**
8585
* @brief PDMatGetRow Get raw row buffer from matrix
@@ -93,14 +93,14 @@ PD_API paddle_error paddle_matrix_get_row(paddle_matrix mat,
9393
paddle_real** rawRowBuffer);
9494

9595
/**
96-
* @brief copy data from the matrix
96+
* @brief copy data from the matrix
9797
* @param [in] mat Target matrix
98-
* @param [out] result pointer to store the matrix data
98+
* @param [out] result pointer to store the matrix data
9999
* @return paddle_error
100100
* @note the space of the result should allocated before invoke this API
101101
*/
102102
PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat,
103-
paddle_real* result);
103+
paddle_real* result);
104104
/**
105105
* @brief PDMatCreateNone Create None Matrix
106106
* @return

paddle/framework/tensor_util.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,18 +135,17 @@ inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx,
135135
auto dst_ptr = static_cast<void*>(dst->data());
136136

137137
if (platform::is_cpu_place(src.place())) {
138-
memory::Copy(dst_place, dst_ptr, boost::get<platform::CPUPlace>(src.place()),
139-
src_ptr, size);
138+
memory::Copy(dst_place, dst_ptr,
139+
boost::get<platform::CPUPlace>(src.place()), src_ptr, size);
140140
}
141141
#ifdef PADDLE_WITH_CUDA
142142
else if (platform::is_gpu_place(src.place())) { // NOLINT
143143
memory::Copy(
144-
dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()), src_ptr,
145-
size,
144+
dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()),
145+
src_ptr, size,
146146
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
147147
}
148148
#endif
149-
150149
}
151150

152151
} // namespace framework

paddle/operators/math/maxouting.cc

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ template <typename T>
2323
class MaxOutFunctor<platform::CPUPlace, T> {
2424
public:
2525
void operator()(const platform::DeviceContext& context,
26-
const framework::Tensor& input,
27-
framework::Tensor * output,
26+
const framework::Tensor& input, framework::Tensor* output,
2827
int groups) {
2928
const int batch_size = input.dims()[0];
3029
const int input_height = input.dims()[2];
@@ -37,34 +36,30 @@ class MaxOutFunctor<platform::CPUPlace, T> {
3736
T* output_data = output->mutable_data<T>(context.GetPlace());
3837

3938
for (int i = 0; i < batch_size; ++i) {
40-
int new_bindex = c_size * i;
39+
int new_bindex = c_size * i;
4140
for (int c = 0; c < output_channels; ++c) {
4241
int new_cindex = fea_size * c;
4342
for (int f = 0; f < fea_size; ++f) {
4443
T ele = static_cast<T>(-FLT_MAX);
4544
for (int ph = 0; ph < groups; ++ph) {
46-
T x = input_data[(new_bindex + new_cindex) * groups
47-
+ ph * fea_size + f];
45+
T x = input_data[(new_bindex + new_cindex) * groups +
46+
ph * fea_size + f];
4847
ele = ele > x ? ele : x;
4948
}
50-
output_data[(new_bindex+new_cindex+f)] = ele;
49+
output_data[(new_bindex + new_cindex + f)] = ele;
5150
}
5251
}
5352
}
5453
}
5554
};
5655

57-
58-
5956
template <class T>
6057
class MaxOutGradFunctor<platform::CPUPlace, T> {
61-
public:
58+
public:
6259
void operator()(const platform::DeviceContext& context,
63-
const framework::Tensor& input,
64-
framework::Tensor * input_grad,
60+
const framework::Tensor& input, framework::Tensor* input_grad,
6561
const framework::Tensor& output,
66-
const framework::Tensor& output_grad,
67-
int groups) {
62+
const framework::Tensor& output_grad, int groups) {
6863
const int batch_size = input.dims()[0];
6964
const int input_height = input.dims()[2];
7065
const int input_width = input.dims()[3];
@@ -84,11 +79,11 @@ class MaxOutGradFunctor<platform::CPUPlace, T> {
8479
bool continue_match = true;
8580
int output_idx = blen + clen + f;
8681
for (int g = 0; g < groups && continue_match; ++g) {
87-
int input_idx = input_idx0 + fea_size * g;
88-
if (input_data[input_idx] == output_data[output_idx]) {
89-
input_grad_data[input_idx] += output_grad_data[output_idx];
90-
continue_match = false;
91-
}
82+
int input_idx = input_idx0 + fea_size * g;
83+
if (input_data[input_idx] == output_data[output_idx]) {
84+
input_grad_data[input_idx] += output_grad_data[output_idx];
85+
continue_match = false;
86+
}
9287
}
9388
}
9489
}

paddle/operators/math/maxouting.cu

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ namespace math {
2121

2222
template <typename T>
2323
__global__ void KernelMaxOut(const int nthreads, const T* input_data,
24-
const int channels,
25-
const int input_height, const int input_width,
26-
int groups, T* output_data ) {
24+
const int channels, const int input_height,
25+
const int input_width, int groups,
26+
T* output_data) {
2727
const int size = input_height * input_width * channels / groups;
2828
const int feat_len = input_height * input_width;
2929
int index = blockIdx.x * blockDim.x + threadIdx.x;
@@ -34,7 +34,7 @@ __global__ void KernelMaxOut(const int nthreads, const T* input_data,
3434
int channel_idx = batch_offset / feat_len;
3535
int feat_idx = batch_offset % feat_len;
3636
int data_idx =
37-
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
37+
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
3838
T ele = static_cast<T>(-FLT_MAX);
3939
for (int g = 0; g < groups; ++g) {
4040
T x = input_data[data_idx + g * feat_len];
@@ -44,34 +44,35 @@ __global__ void KernelMaxOut(const int nthreads, const T* input_data,
4444
}
4545
}
4646
template <typename T>
47-
__global__ void KernelMaxoutGrad(
48-
const int nthreads, const T* input_data, const T* output_data,
49-
const T* output_grad, T* input_grad, const int channels,
50-
const int input_height, const int input_width, int groups) {
51-
const int size = input_height * input_width * channels / groups;
52-
const int feat_len = input_height * input_width;
53-
int index = blockIdx.x * blockDim.x + threadIdx.x;
54-
int offset = blockDim.x * gridDim.x;
55-
for (int i = index; i < nthreads; i += offset) {
56-
int batch_idx = i / size;
57-
int batch_offset = i % size;
58-
int channel_idx = batch_offset / feat_len;
59-
int feat_idx = batch_offset % feat_len;
60-
int data_idx =
47+
__global__ void KernelMaxoutGrad(const int nthreads, const T* input_data,
48+
const T* output_data, const T* output_grad,
49+
T* input_grad, const int channels,
50+
const int input_height, const int input_width,
51+
int groups) {
52+
const int size = input_height * input_width * channels / groups;
53+
const int feat_len = input_height * input_width;
54+
int index = blockIdx.x * blockDim.x + threadIdx.x;
55+
int offset = blockDim.x * gridDim.x;
56+
for (int i = index; i < nthreads; i += offset) {
57+
int batch_idx = i / size;
58+
int batch_offset = i % size;
59+
int channel_idx = batch_offset / feat_len;
60+
int feat_idx = batch_offset % feat_len;
61+
int data_idx =
6162
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
62-
int max_index = -1;
63-
bool continue_match = true;
64-
for (int g = 0; g < groups && continue_match; ++g) {
65-
if (input_data[data_idx + g * feat_len] == output_data[i]) {
66-
max_index = data_idx + g * feat_len;
67-
continue_match = false;
68-
break;
69-
}
70-
}
71-
if (max_index != -1) {
72-
input_grad[max_index] += output_grad[index];
63+
int max_index = -1;
64+
bool continue_match = true;
65+
for (int g = 0; g < groups && continue_match; ++g) {
66+
if (input_data[data_idx + g * feat_len] == output_data[i]) {
67+
max_index = data_idx + g * feat_len;
68+
continue_match = false;
69+
break;
7370
}
7471
}
72+
if (max_index != -1) {
73+
input_grad[max_index] += output_grad[index];
74+
}
75+
}
7576
}
7677
/*
7778
* All tensors are in NCHW format.
@@ -80,7 +81,7 @@ template <typename T>
8081
class MaxOutFunctor<platform::GPUPlace, T> {
8182
public:
8283
void operator()(const platform::DeviceContext& context,
83-
const framework::Tensor& input, framework::Tensor * output,
84+
const framework::Tensor& input, framework::Tensor* output,
8485
int groups) {
8586
const int batch_size = input.dims()[0];
8687
const int input_channels = input.dims()[1];
@@ -92,7 +93,7 @@ class MaxOutFunctor<platform::GPUPlace, T> {
9293

9394
const T* input_data = input.data<T>();
9495
T* output_data = output->mutable_data<T>(context.GetPlace());
95-
int nthreads = output->numel();
96+
int nthreads = output->numel();
9697
int blocks = (nthreads + 1024 - 1) / 1024;
9798
dim3 threads(1024, 1);
9899
dim3 grid(blocks, 1);
@@ -101,8 +102,7 @@ class MaxOutFunctor<platform::GPUPlace, T> {
101102
T><<<grid, threads, 0,
102103
reinterpret_cast<const platform::CUDADeviceContext&>(context)
103104
.stream()>>>(nthreads, input_data, input_channels,
104-
input_height, input_width, groups,
105-
output_data);
105+
input_height, input_width, groups, output_data);
106106
}
107107
};
108108
/*
@@ -112,11 +112,9 @@ template <typename T>
112112
class MaxOutGradFunctor<platform::GPUPlace, T> {
113113
public:
114114
void operator()(const platform::DeviceContext& context,
115-
const framework::Tensor& input,
116-
framework::Tensor * input_grad,
115+
const framework::Tensor& input, framework::Tensor* input_grad,
117116
const framework::Tensor& output,
118-
const framework::Tensor& output_grad,
119-
int groups) {
117+
const framework::Tensor& output_grad, int groups) {
120118
const int batch_size = input.dims()[0];
121119
const int input_channels = input.dims()[1];
122120
const int input_height = input.dims()[2];
@@ -129,17 +127,17 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
129127
const T* output_data = output.data<T>();
130128
const T* output_grad_data = output_grad.data<T>();
131129
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
132-
int nthreads = output.numel();
130+
int nthreads = output.numel();
133131
int blocks = (nthreads + 1024 - 1) / 1024;
134132
dim3 threads(1024, 1);
135133
dim3 grid(blocks, 1);
136134

137135
KernelMaxoutGrad<
138136
T><<<grid, threads, 0,
139137
reinterpret_cast<const platform::CUDADeviceContext&>(context)
140-
.stream()>>>(
141-
nthreads, input_data, output_data, output_grad_data, input_grad_data,
142-
input_channels, input_height, input_width, groups);
138+
.stream()>>>(nthreads, input_data, output_data,
139+
output_grad_data, input_grad_data, input_channels,
140+
input_height, input_width, groups);
143141
}
144142
};
145143

paddle/operators/math/maxouting.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,22 @@ namespace paddle {
2121
namespace operators {
2222
namespace math {
2323

24-
#define FLT_MAX \
25-
__FLT_MAX__
24+
#define FLT_MAX __FLT_MAX__
2625

2726
template <typename Place, typename T>
2827

2928
class MaxOutFunctor {
3029
public:
3130
void operator()(const platform::DeviceContext& context,
32-
const framework::Tensor& input, framework::Tensor * output,
31+
const framework::Tensor& input, framework::Tensor* output,
3332
int groups);
3433
};
3534

3635
template <typename Place, class T>
3736
class MaxOutGradFunctor {
3837
public:
3938
void operator()(const platform::DeviceContext& context,
40-
const framework::Tensor& input,
41-
framework::Tensor * input_grad,
39+
const framework::Tensor& input, framework::Tensor* input_grad,
4240
const framework::Tensor& output,
4341
const framework::Tensor& output_grad, int groups);
4442
};

paddle/operators/maxout_op.cc

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,17 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
2222
public:
2323
MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
2424
: OpProtoAndCheckerMaker(proto, op_checker) {
25-
AddInput("X",
25+
AddInput(
26+
"X",
2627
"(Tensor) The input tensor of maxout operator. "
2728
"The format of input tensor is NCHW. Where N is batch size, C is the "
2829
"number of channels, H and W is the height and width of feature.");
2930
AddOutput("Out",
30-
"(Tensor) The output tensor of maxout operator."
31-
"The format of output tensor is also NCHW."
32-
"Where N is batch size, C is "
33-
"the number of channels, H and W is the height and "
34-
"width of feature.");
31+
"(Tensor) The output tensor of maxout operator."
32+
"The format of output tensor is also NCHW."
33+
"Where N is batch size, C is "
34+
"the number of channels, H and W is the height and "
35+
"width of feature.");
3536
AddAttr<int>(
3637
"groups",
3738
R"DOC("Specifies how many groups the input tensor will be split"
@@ -59,21 +60,19 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
5960
}
6061
};
6162

62-
6363
class MaxOutOp : public framework::OperatorWithKernel {
6464
public:
6565
using framework::OperatorWithKernel::OperatorWithKernel;
6666
void InferShape(framework::InferShapeContext* ctx) const override {
67-
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp"
67+
PADDLE_ENFORCE(ctx->HasInput("X"),
68+
"Input(X) of MaxoutOp"
6869
"should not be null.");
6970
PADDLE_ENFORCE(ctx->HasOutput("Out"),
7071
"Output(Out) of MaxoutOp should not be null.");
7172
auto in_x_dims = ctx->GetInputDim("X");
7273
int groups = ctx->Attrs().Get<int>("groups");
7374
// check groups > 1
74-
PADDLE_ENFORCE_GT(
75-
groups, 1,
76-
"groups should be larger than 1 in maxoutop");
75+
PADDLE_ENFORCE_GT(groups, 1, "groups should be larger than 1 in maxoutop");
7776
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1] / groups});
7877
output_shape.push_back(in_x_dims[2]);
7978
output_shape.push_back(in_x_dims[3]);
@@ -87,18 +86,17 @@ class MaxOutOpGrad : public framework::OperatorWithKernel {
8786
void InferShape(framework::InferShapeContext* ctx) const override {
8887
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
8988
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
90-
"Input(X@GRAD) should not be null.");
89+
"Input(X@GRAD) should not be null.");
9190
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
9291
}
9392
};
94-
} // namespace operators
95-
} // namespace paddle
93+
} // namespace operators
94+
} // namespace paddle
9695

9796
namespace ops = paddle::operators;
9897
REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad,
99-
ops::MaxOutOpGrad);
100-
REGISTER_OP_CPU_KERNEL(maxout, ops::MaxOutKernel<paddle::platform::CPUPlace,
101-
float>);
102-
REGISTER_OP_CPU_KERNEL(maxout_grad,
103-
ops::MaxOutGradKernel<paddle::platform::CPUPlace,
104-
float>);
98+
ops::MaxOutOpGrad);
99+
REGISTER_OP_CPU_KERNEL(maxout,
100+
ops::MaxOutKernel<paddle::platform::CPUPlace, float>);
101+
REGISTER_OP_CPU_KERNEL(
102+
maxout_grad, ops::MaxOutGradKernel<paddle::platform::CPUPlace, float>);

paddle/operators/maxout_op.cu.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ namespace ops = paddle::operators;
1818
REGISTER_OP_GPU_KERNEL(maxout,
1919
ops::MaxOutKernel<paddle::platform::GPUPlace, float>,
2020
ops::MaxOutKernel<paddle::platform::GPUPlace, double>);
21-
REGISTER_OP_GPU_KERNEL(maxout_grad,
22-
ops::MaxOutGradKernel<paddle::platform::GPUPlace,
23-
float>,
24-
ops::MaxOutGradKernel<paddle::platform::GPUPlace,
25-
double>);
21+
REGISTER_OP_GPU_KERNEL(
22+
maxout_grad, ops::MaxOutGradKernel<paddle::platform::GPUPlace, float>,
23+
ops::MaxOutGradKernel<paddle::platform::GPUPlace, double>);

0 commit comments

Comments
 (0)