Skip to content

Commit 65db861

Browse files
authored
batch bug fix of decoder_weight shape in transformers' model (#608)
* fix a bug of weight shape in bert model * batch bug fix of decoder_weight shape
1 parent aa70c63 commit 65db861

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

paddlenlp/transformers/bert/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ def __init__(self,
501501
self.activation = getattr(nn.functional, activation)
502502
self.layer_norm = nn.LayerNorm(hidden_size)
503503
self.decoder_weight = self.create_parameter(
504-
shape=[hidden_size, vocab_size],
504+
shape=[vocab_size, hidden_size],
505505
dtype=self.transform.weight.dtype,
506506
is_bias=False) if embedding_weights is None else embedding_weights
507507
self.decoder_bias = self.create_parameter(

paddlenlp/transformers/bigbird/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ def __init__(self,
569569
self.activation = getattr(nn.functional, activation)
570570
self.layer_norm = nn.LayerNorm(hidden_size, epsilon=1e-12)
571571
self.decoder_weight = self.create_parameter(
572-
shape=[hidden_size, vocab_size],
572+
shape=[vocab_size, hidden_size],
573573
dtype=self.transform.weight.dtype,
574574
is_bias=False) if embedding_weights is None else embedding_weights
575575
self.decoder_bias = self.create_parameter(

paddlenlp/transformers/ernie/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ def __init__(self,
645645
self.activation = getattr(nn.functional, activation)
646646
self.layer_norm = nn.LayerNorm(hidden_size)
647647
self.decoder_weight = self.create_parameter(
648-
shape=[hidden_size, vocab_size],
648+
shape=[vocab_size, hidden_size],
649649
dtype=self.transform.weight.dtype,
650650
is_bias=False) if embedding_weights is None else embedding_weights
651651
self.decoder_bias = self.create_parameter(

paddlenlp/transformers/gpt/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ class GPTLMHead(nn.Layer):
858858
def __init__(self, hidden_size, vocab_size, embedding_weights=None):
859859
super(GPTLMHead, self).__init__()
860860
self.decoder_weight = self.create_parameter(
861-
shape=[hidden_size, vocab_size],
861+
shape=[vocab_size, hidden_size],
862862
dtype=paddle.get_default_dtype(),
863863
is_bias=True) if embedding_weights is None else embedding_weights
864864

paddlenlp/transformers/unified_transformer/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ def __init__(self,
372372
self.activation = getattr(nn.functional, activation)
373373
self.layer_norm = nn.LayerNorm(hidden_size)
374374
self.decoder_weight = self.create_parameter(
375-
shape=[hidden_size, vocab_size],
375+
shape=[vocab_size, hidden_size],
376376
dtype=self.transform.weight.dtype,
377377
is_bias=False) if embedding_weights is None else embedding_weights
378378
self.decoder_bias = self.create_parameter(

0 commit comments

Comments
 (0)