Skip to content

Commit 00e8791

Browse files
committed
fix compile in cpu error. test=develop
1 parent d239cf2 commit 00e8791

File tree

3 files changed

+37
-38
lines changed

3 files changed

+37
-38
lines changed

paddle/fluid/operators/momentum_op.cc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,15 @@ class MomentumOp : public framework::OperatorWithKernel {
4545
"Output(VelocityOut) of Momentum should not be null.");
4646

4747
auto param_dim = ctx->GetInputDim("Param");
48-
PADDLE_ENFORCE_EQ(
49-
param_dim, ctx->GetInputDim("Grad"),
50-
"Param and Grad input of MomentumOp should have the same dimension.");
51-
PADDLE_ENFORCE_EQ(
52-
param_dim, ctx->GetInputDim("Velocity"),
53-
"Param and Velocity of MomentumOp should have the same dimension.");
48+
if (ctx->GetInputsVarType("Grad")[0] ==
49+
framework::proto::VarType::LOD_TENSOR) {
50+
PADDLE_ENFORCE_EQ(
51+
param_dim, ctx->GetInputDim("Grad"),
52+
"Param and Grad input of MomentumOp should have the same dimension.");
53+
PADDLE_ENFORCE_EQ(
54+
param_dim, ctx->GetInputDim("Velocity"),
55+
"Param and Velocity of MomentumOp should have the same dimension.");
56+
}
5457
PADDLE_ENFORCE_EQ(framework::product(ctx->GetInputDim("LearningRate")), 1,
5558
"Learning_rate should be a scalar");
5659

paddle/fluid/operators/momentum_op.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#pragma once
16+
#include <string>
1617
#include "paddle/fluid/framework/eigen.h"
1718
#include "paddle/fluid/framework/op_registry.h"
1819
#include "paddle/fluid/operators/math/algorithm.h"
@@ -303,38 +304,39 @@ class MomentumOpKernel : public framework::OpKernel<T> {
303304
auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
304305
.Var()
305306
->GetMutable<framework::SelectedRows>();
306-
307307
math::scatter::MergeAdd<DeviceContext, T> merge_func;
308308
merge_func(ctx.template device_context<DeviceContext>(), *grad,
309309
merged_grad);
310310

311-
platform::ForRange<DeviceContext> for_range(
312-
static_cast<const DeviceContext&>(ctx.device_context()),
313-
param->numel());
314-
315311
const int64_t* rows = nullptr;
312+
#ifdef PADDLE_WITH_CUDA
316313
if (platform::is_gpu_place(ctx.GetPlace())) {
317314
rows = merged_grad->rows().CUDAData(ctx.GetPlace());
318315
} else {
316+
#endif
319317
rows = merged_grad->rows().data();
318+
#ifdef PADDLE_WITH_CUDA
320319
}
321-
320+
#endif
321+
int64_t row_numel =
322+
merged_grad->value().numel() / merged_grad->rows().size();
323+
platform::ForRange<DeviceContext> for_range(
324+
static_cast<const DeviceContext&>(ctx.device_context()),
325+
param->numel());
322326
if (use_nesterov) {
323327
SparseMomentumFunctor<T, UseNesterov> functor(
324328
param->data<T>(), merged_grad->value().data<T>(),
325-
velocity->data<T>(), learning_rate->data<T>(), mu, rows,
329+
velocity->data<T>(), learning_rate->data<T>(), mu, rows, row_numel,
326330
static_cast<int64_t>(merged_grad->rows().size()),
327-
static_cast<int64_t>(merged_grad->height()),
328331
param_out->mutable_data<T>(ctx.GetPlace()),
329332
velocity_out->mutable_data<T>(ctx.GetPlace()));
330333
for_range(functor);
331334

332335
} else {
333336
SparseMomentumFunctor<T, NoNesterov> functor(
334337
param->data<T>(), merged_grad->value().data<T>(),
335-
velocity->data<T>(), learning_rate->data<T>(), mu, rows,
338+
velocity->data<T>(), learning_rate->data<T>(), mu, rows, row_numel,
336339
static_cast<int64_t>(merged_grad->rows().size()),
337-
static_cast<int64_t>(merged_grad->height()),
338340
param_out->mutable_data<T>(ctx.GetPlace()),
339341
velocity_out->mutable_data<T>(ctx.GetPlace()));
340342
for_range(functor);

python/paddle/fluid/tests/unittests/test_momentum_op.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -121,22 +121,13 @@ def check_with_place(self, place):
121121
grad_tensor = grad_selected_rows.get_tensor()
122122
grad_tensor.set(grad_np_array, place)
123123

124-
velocity_selected_rows = scope.var('Velocity').get_selected_rows()
125-
velocity_selected_rows.set_height(height)
126-
velocity_selected_rows.set_rows(rows)
127-
velocity_np_array = np.ones((len(rows), row_numel)).astype("float32")
128-
velocity_np_array[0, 0] = 2.0
129-
velocity_np_array[2, 8] = 2.0
130-
velocity_tensor = velocity_selected_rows.get_tensor()
131-
velocity_tensor.set(velocity_np_array, place)
132-
velocity_out_selected_rows = scope.var('VelocityOut').get_selected_rows(
133-
)
134-
velocity_out_selected_rows.set_height(height)
135-
velocity_out_selected_rows.set_rows(rows)
136-
velocity_out_np_array = np.full((len(rows), row_numel),
124+
velocity = scope.var('Velocity').get_tensor()
125+
velocity_np_array = np.ones((height, row_numel)).astype("float32")
126+
velocity.set(velocity_np_array, place)
127+
velocity_out = scope.var('VelocityOut').get_tensor()
128+
velocity_out_np_array = np.full((height, row_numel),
137129
0.0).astype("float32")
138-
velocity_out_tensor = velocity_out_selected_rows.get_tensor()
139-
velocity_out_tensor.set(velocity_out_np_array, place)
130+
velocity_out.set(velocity_out_np_array, place)
140131

141132
# create and initialize LeraningRate Variable
142133
lr = scope.var('LearningRate').get_tensor()
@@ -158,19 +149,22 @@ def check_with_place(self, place):
158149

159150
# get and compare result
160151
param_out_np_array = np.array(param_out)
161-
velocity_out_np_array = np.array(velocity_out_tensor)
152+
velocity_out_np_array = np.array(velocity_out)
162153

163154
# TODO(dzh): add a more suitable general numpy interface
164155
# for sparse update.
165-
_velocity_out = mu * velocity_np_array + grad_np_array
166-
_param = param_array[rows]
156+
_grad_np_array = np.full((height, row_numel), 0.0).astype("float32")
157+
for i in range(len(rows)):
158+
_grad_np_array[rows[i]] = grad_np_array[i]
159+
_velocity_out = mu * velocity_np_array + _grad_np_array
160+
_param = param_array
167161
if use_nesterov:
168-
_param_out = _param - grad_np_array * lr_array - \
169-
_velocity_out * mu * lr_array
162+
_param_out = _param - (_grad_np_array + _velocity_out * mu
163+
) * lr_array
170164
else:
171-
_param_out = _param - lr * _velocity_out
172-
self.assertTrue((_param_out == param_out_np_array[rows]).all())
165+
_param_out = _param - lr_array * _velocity_out
173166
self.assertTrue((_velocity_out == velocity_out_np_array).all())
167+
self.assertTrue((_param_out == param_out_np_array).all())
174168

175169
def init_kernel(self):
176170
pass

0 commit comments

Comments
 (0)