diff --git a/paddlenlp/transformers/convbert/modeling.py b/paddlenlp/transformers/convbert/modeling.py index 7c9dfd3a1bd0..926046bcafa9 100644 --- a/paddlenlp/transformers/convbert/modeling.py +++ b/paddlenlp/transformers/convbert/modeling.py @@ -322,6 +322,7 @@ def __init__(self, hidden_size, hidden_act): self.act = get_activation(hidden_act) def forward(self, discriminator_hidden_states): + discriminator_hidden_states = discriminator_hidden_states[0] hidden_states = self.dense(discriminator_hidden_states) hidden_states = self.act(hidden_states) logits = self.dense_prediction(hidden_states).squeeze() diff --git a/paddlenlp/transformers/rembert/modeling.py b/paddlenlp/transformers/rembert/modeling.py index c4697253e7ff..80ef7bf275ef 100644 --- a/paddlenlp/transformers/rembert/modeling.py +++ b/paddlenlp/transformers/rembert/modeling.py @@ -177,6 +177,7 @@ def __init__(self, config: RemBertConfig): self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward(self, hidden_states, input_tensor): + hidden_states = hidden_states[0] hidden_states = self.dense(hidden_states) hidden_states = self.dropout(hidden_states) hidden_states = self.layer_norm(hidden_states + input_tensor) diff --git a/paddlenlp/utils/optimizer.py b/paddlenlp/utils/optimizer.py index 277a970b37f0..c833ea665f37 100644 --- a/paddlenlp/utils/optimizer.py +++ b/paddlenlp/utils/optimizer.py @@ -144,7 +144,7 @@ def adamw_python( mom1 = beta1 * mom1 + (1.0 - beta1) * grad mom2 = beta2 * mom2 + (1.0 - beta2) * (grad * grad).mean() denom = mom2.sqrt() / (1.0 - beta2_pow).sqrt() + epsilon - p += (moment1 / denom) * (-(lr / (1.0 - beta1_pow))) + p += (mom1 / denom) * (-(lr / (1.0 - beta1_pow))) if master_weight is not None: master_weight[:] = p param[:] = p.astype(param.dtype) diff --git a/requirements.txt b/requirements.txt index 7f4e42796606..00efca7710db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,8 @@ colorama seqeval dill<0.3.5 multiprocess<=0.70.12.2 -datasets >= 2.0.0 +datasets==3.6.0 +pyarrow==20.0.0 tqdm paddlefsl sentencepiece @@ -21,7 +22,7 @@ typer rich safetensors fast_dataindex>=0.1.1 ; platform_system == "Linux" -aistudio-sdk>=0.1.3 +aistudio-sdk>=0.3.0 jinja2 regex numpy<=1.26.4 @@ -30,3 +31,4 @@ ml_dtypes tokenizers<=0.20.3; python_version<="3.8" tokenizers>=0.21,<0.22; python_version>"3.8" omegaconf +einops>=0.7.0 \ No newline at end of file diff --git a/tests/transformers/bloom/test_modeling.py b/tests/transformers/bloom/test_modeling.py index 991059df16f4..5dcc77352a0d 100644 --- a/tests/transformers/bloom/test_modeling.py +++ b/tests/transformers/bloom/test_modeling.py @@ -505,6 +505,10 @@ def test_inputs_embeds(self): with paddle.no_grad(): embeds_output = model(**inputs) + if isinstance(ids_output, tuple): + ids_output = ids_output[0] + if isinstance(embeds_output, tuple): + embeds_output = embeds_output[0] self.assertTrue(paddle.allclose(ids_output, embeds_output, rtol=1e-4, atol=1e-4)) diff --git a/tests/transformers/mbart/test_modeling.py b/tests/transformers/mbart/test_modeling.py index 2a77613af74c..eb1e3b59cd95 100644 --- a/tests/transformers/mbart/test_modeling.py +++ b/tests/transformers/mbart/test_modeling.py @@ -258,6 +258,10 @@ def test_inputs_embeds_for_mbart(self): with paddle.no_grad(): embeds_output = model(**inputs) + if isinstance(ids_output, tuple): + ids_output = ids_output[0] + if isinstance(embeds_output, tuple): + embeds_output = embeds_output[0] self.assertTrue(paddle.allclose(ids_output, embeds_output, rtol=1e-4, atol=1e-4))