Skip to content

Commit 6b9302a

Browse files
authored
[Cherry-Pick]Fix bug where embedding can‘t be processed correctly in reducer (#29490)
* fix the bug of reducer in embedding
1 parent d8e1e50 commit 6b9302a

File tree

2 files changed

+85
-13
lines changed

2 files changed

+85
-13
lines changed

python/paddle/fluid/dygraph/parallel.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
from paddle.fluid.dygraph import parallel_helper
2525
from paddle.fluid.dygraph import to_variable, no_grad
2626
from paddle.utils import deprecated
27-
from paddle.fluid.dygraph import nn
2827
import warnings
28+
import paddle
2929

3030
__all__ = ["prepare_context", "ParallelEnv", "DataParallel"]
3131

@@ -419,9 +419,13 @@ def init_reducer(self):
419419
# NOTE(shenliang03): Here we can only use the attributes to judge whether
420420
# parameter is sparse(or SelectedRows). The reason is that the sparse message
421421
# can't be obtained when bp hasn't happened yet. So if layer supports sparse parameter,
422-
# we should add the layer here like "nn.Embedding".
422+
# we should add the layer here like "paddle.nn.layer.common.Embedding".
423423
def check_layer_sparse(sublayer):
424-
if isinstance(sublayer, nn.Embedding):
424+
if isinstance(sublayer, paddle.nn.layer.common.Embedding):
425+
return sublayer._sparse
426+
# NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding
427+
# is removed in the future, the judgment will also be removed here.
428+
if isinstance(sublayer, paddle.fluid.dygraph.Embedding):
425429
return sublayer._is_sparse
426430
return False
427431

python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,60 @@
1515
from __future__ import print_function
1616

1717
import numpy as np
18-
1918
import paddle
20-
import paddle.fluid as fluid
21-
from paddle.fluid.dygraph.nn import Embedding
22-
from paddle.fluid.dygraph.base import to_variable
2319

2420
from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
25-
from parallel_dygraph_sparse_embedding import SimpleNet, fake_sample_reader, TestSparseEmbedding
21+
from paddle.nn import Layer, Embedding
22+
paddle.set_default_dtype("float64")
23+
24+
25+
class SimpleNet(Layer):
26+
def __init__(self,
27+
hidden_size,
28+
vocab_size,
29+
num_steps=20,
30+
init_scale=0.1,
31+
is_sparse=False,
32+
dtype="float64"):
33+
super(SimpleNet, self).__init__()
34+
self.hidden_size = hidden_size
35+
self.vocab_size = vocab_size
36+
self.init_scale = init_scale
37+
self.num_steps = num_steps
38+
self.embedding = Embedding(
39+
self.vocab_size,
40+
self.hidden_size,
41+
sparse=True,
42+
weight_attr=paddle.ParamAttr(
43+
name='embedding_param',
44+
initializer=paddle.nn.initializer.Uniform(
45+
low=-init_scale, high=init_scale)))
46+
self.softmax_weight = self.create_parameter(
47+
attr=paddle.ParamAttr(),
48+
shape=[self.hidden_size, self.vocab_size],
49+
dtype=dtype,
50+
default_initializer=paddle.nn.initializer.Uniform(
51+
low=-self.init_scale, high=self.init_scale))
52+
self.softmax_bias = self.create_parameter(
53+
attr=paddle.ParamAttr(),
54+
shape=[self.vocab_size],
55+
dtype=dtype,
56+
default_initializer=paddle.nn.initializer.Uniform(
57+
low=-self.init_scale, high=self.init_scale))
58+
59+
def forward(self, input, label):
60+
x_emb = self.embedding(input)
61+
fc = paddle.matmul(x_emb, self.softmax_weight)
62+
fc = paddle.add(fc, self.softmax_bias)
63+
projection = paddle.reshape(fc, shape=[-1, self.vocab_size])
64+
loss = paddle.nn.functional.softmax_with_cross_entropy(
65+
logits=projection, label=label, soft_label=False)
66+
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
67+
loss = paddle.mean(loss, axis=[0])
68+
loss = paddle.sum(loss)
69+
70+
return loss
71+
2672

2773
# global configs
2874
batch_size = 4
@@ -33,24 +79,46 @@
3379
init_scale = 0.1
3480

3581

36-
class TestSparseEmbeddingFP64(TestSparseEmbedding):
82+
def fake_sample_reader():
83+
def __reader__():
84+
for i in range(batch_num):
85+
x_data = np.arange(num_steps).astype('int64')
86+
y_data = np.arange(1, 1 + num_steps).astype('int64')
87+
yield x_data, y_data
88+
89+
return __reader__
90+
91+
92+
class TestSparseEmbeddingFP64(TestParallelDyGraphRunnerBase):
3793
def get_model(self):
3894
model = SimpleNet(
3995
hidden_size=hidden_size,
4096
vocab_size=vocab_size,
4197
num_steps=num_steps,
4298
init_scale=init_scale,
43-
is_sparse=True,
44-
dtype="float64")
99+
is_sparse=True)
45100

46101
train_reader = paddle.batch(
47102
fake_sample_reader(), batch_size=batch_size, drop_last=True)
48103

49-
optimizer = fluid.optimizer.SGD(learning_rate=0.001,
50-
parameter_list=model.parameters())
104+
optimizer = paddle.optimizer.SGD(learning_rate=0.001,
105+
parameters=model.parameters())
51106

52107
return model, train_reader, optimizer
53108

109+
def run_one_loop(self, model, optimizer, batch):
110+
x_data = np.array([x[0].reshape(3) for x in batch]).astype('int64')
111+
y_data = np.array([x[1].reshape(3) for x in batch]).astype('int64')
112+
x_data = x_data.reshape((-1, num_steps, 1))
113+
y_data = y_data.reshape((-1, 1))
114+
115+
x = paddle.to_tensor(x_data)
116+
y = paddle.to_tensor(y_data)
117+
118+
dy_loss = model(x, y)
119+
120+
return dy_loss
121+
54122

55123
if __name__ == "__main__":
56124
runtime_main(TestSparseEmbeddingFP64)

0 commit comments

Comments
 (0)