Skip to content

Commit 409eff7

Browse files
authored
Cherry adam errorinfo (#27169)
* add check for sparse parameters with weight_decay * move sparse check to adam.py
1 parent 834face commit 409eff7

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

python/paddle/fluid/tests/unittests/test_adam_op.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,6 @@ def test_adam_op_dygraph(self):
448448

449449
def test_adam_op_with_state_dict(self):
450450

451-
import paddle
452451
paddle.disable_static()
453452
emb = paddle.nn.Embedding(10, 10)
454453

@@ -517,6 +516,20 @@ def test_adam_op_invalid_input(self):
517516
adam = paddle.optimizer.Adam(
518517
0.1, epsilon=-1, parameters=linear.parameters())
519518

519+
def test_adam_op_with_sparse_input_and_weight_decay(self):
520+
521+
paddle.disable_static()
522+
x_data = np.arange(0, 10).reshape((10, 1)).astype(np.int64)
523+
x = paddle.to_tensor(x_data, stop_gradient=False)
524+
emb = paddle.nn.Embedding(10, 10, sparse=True)
525+
adam = paddle.optimizer.Adam(
526+
0.001, parameters=emb.parameters(), weight_decay=0.01)
527+
528+
with self.assertRaises(RuntimeError):
529+
out = emb(x)
530+
out.backward()
531+
adam.step()
532+
520533

521534
if __name__ == "__main__":
522535
unittest.main()

python/paddle/optimizer/adam.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,47 @@ def _append_optimize_op(self, block, param_and_grad):
250250
stop_gradient=True)
251251

252252
return adam_op
253+
254+
@framework.dygraph_only
255+
def step(self):
256+
"""
257+
Execute the optimizer and update parameters once.
258+
259+
Returns:
260+
None
261+
262+
Examples:
263+
.. code-block:: python
264+
265+
import paddle
266+
import numpy as np
267+
paddle.disable_static()
268+
value = np.arange(26).reshape(2, 13).astype("float32")
269+
a = paddle.to_tensor(value)
270+
linear = paddle.nn.Linear(13, 5)
271+
# This can be any optimizer supported by dygraph.
272+
adam = paddle.optimizer.Adam(learning_rate = 0.01,
273+
parameters = linear.parameters())
274+
out = linear(a)
275+
out.backward()
276+
adam.step()
277+
adam.clear_grad()
278+
"""
279+
parameter_list = self._parameter_list
280+
self._dtype = None
281+
params_grads = []
282+
for param in self._parameter_list:
283+
if not param.trainable:
284+
continue
285+
if hasattr(
286+
param, "_is_sparse"
287+
) and param._is_sparse and self.regularization is not None:
288+
raise RuntimeError(
289+
"Adam don't support weight_decay with sparse parameters, please set it to None."
290+
)
291+
if param._grad_ivar() is not None:
292+
grad_var = param._grad_ivar()
293+
params_grads.append((param, grad_var))
294+
295+
optimize_ops = self._apply_optimize(
296+
loss=None, startup_program=None, params_grads=params_grads)

0 commit comments

Comments
 (0)