Skip to content

Commit dbd8474

Browse files
authored
Fix : BLOOM tie_word_embeddings in GGUF (#35812)
* fix bloom ggml * fix falcon output * make style
1 parent 678bd7f commit dbd8474

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

src/transformers/modeling_gguf_pytorch_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
400400

401401
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
402402
# tie_word_embeddings is true otherwise false
403-
exceptions = ["falcon"]
403+
exceptions = ["falcon", "bloom"]
404404
parsed_parameters["config"]["tie_word_embeddings"] = (
405405
all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
406406
)

tests/quantization/ggml/test_ggml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ def test_falcon7b_q2_k(self):
633633
text = tokenizer(self.example_text, return_tensors="pt")["input_ids"].to(torch_device)
634634
out = model.generate(text, max_new_tokens=16)
635635

636-
EXPECTED_TEXT = 'Hello,\nI am trying to use the "get_post_meta"'
636+
EXPECTED_TEXT = "Hello All,\nI am new to this forum.\nI am using the "
637637
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
638638

639639
@unittest.skip("The test causes a torch.OutOfMemoryError on the CI but it passes with enough memory")

0 commit comments

Comments
 (0)