diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index 9a2c937e51d1..d3e9c8e03a2b 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -687,9 +687,9 @@ def forward( inputs_embeds = nn.functional.gelu(self.conv2(inputs_embeds)) inputs_embeds = inputs_embeds.permute(0, 2, 1) - embed_pos = self.embed_positions.weight + all_positions = torch.arange(self.embed_positions.num_embeddings, device=inputs_embeds.device) - hidden_states = inputs_embeds + embed_pos + hidden_states = inputs_embeds + self.embed_positions(all_positions) hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) encoder_states = () if output_hidden_states else None diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index dbb241f5ad4b..1b4641f5d49b 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -3356,22 +3356,6 @@ def test_forward_pass_weighted_layer_sum(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model_forward(*config_and_inputs, use_weighted_layer_sum=True) - @unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.") - def test_cpu_offload(self): - pass - - @unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.") - def test_disk_offload_bin(self): - pass - - @unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.") - def test_disk_offload_safetensors(self): - pass - - @unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.") - def test_model_parallelism(self): - pass - @unittest.skip(reason="Not applicable for an encoder-only acoustic model") def test_inputs_embeds(self): # input embeds is meaningless for an encoder-only acoustic model