@@ -59,6 +59,8 @@ class GgufIntegrationTests(unittest.TestCase):
5959 starcoder2_model_id = "QuantFactory/starcoder2-3b-GGUF"
6060 starcoder2_fp16_model_id = "brittlewis12/starcoder2-3b-GGUF"
6161 starcoder2_original_model_id = "bigcode/starcoder2-3b"
62+ mamba_original_model_id = "state-spaces/mamba-2.8b-hf"
63+ mamba_model_id = "jpodivin/mamba-2.8b-hf-GGUF"
6264
6365 # standard quants
6466 q4_0_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
@@ -102,6 +104,8 @@ class GgufIntegrationTests(unittest.TestCase):
102104 q6_k_gpt2_xl_model_id = "gpt2-xl.Q6_K.gguf"
103105 q6_k_starcoder2_model_id = "starcoder2-3b.Q6_K.gguf"
104106 fp16_starcoder2_gguf_model_id = "starcoder2-3b.fp16.gguf"
107+ q6_k_mamba_model_id = "ggml-model-Q6_K.gguf"
108+ fp16_mamba_model_id = "ggml-model-f16.gguf"
105109
106110 example_text = "Hello"
107111
@@ -573,6 +577,8 @@ def test_gpt2_weights_conversion_fp16(self):
573577 if layer_name in quantized_state_dict :
574578 self .assertTrue (original_params .shape == quantized_state_dict [layer_name ].shape )
575579 torch .testing .assert_close (original_params , quantized_state_dict [layer_name ])
580+ else :
581+ raise ValueError (f"Layer { layer_name } is not presented in GGUF model" )
576582
577583 def test_gpt2_xl_Q6_K (self ):
578584 tokenizer = AutoTokenizer .from_pretrained (self .gpt2_xl_model_id , gguf_file = self .q6_k_gpt2_xl_model_id )
@@ -639,6 +645,8 @@ def test_falcon7b_weights_conversion_fp16(self):
639645 if layer_name in quantized_state_dict :
640646 self .assertTrue (original_params .shape == quantized_state_dict [layer_name ].shape )
641647 torch .testing .assert_close (original_params , quantized_state_dict [layer_name ])
648+ else :
649+ raise ValueError (f"Layer { layer_name } is not presented in GGUF model" )
642650
643651 def test_stablelm_q4_k_m (self ):
644652 model = AutoModelForCausalLM .from_pretrained (
@@ -708,6 +716,8 @@ def test_stablelm_weights_conversion_fp16(self):
708716 if layer_name in converted_state_dict :
709717 self .assertTrue (original_params .shape == converted_state_dict [layer_name ].shape )
710718 torch .testing .assert_close (original_params , converted_state_dict [layer_name ])
719+ else :
720+ raise ValueError (f"Layer { layer_name } is not presented in GGUF model" )
711721
712722 def test_starcoder2_weights_conversion_fp16 (self ):
713723 original_model = AutoModelForCausalLM .from_pretrained (
@@ -727,10 +737,11 @@ def test_starcoder2_weights_conversion_fp16(self):
727737 original_state_dict = original_model .state_dict ()
728738
729739 for layer_name , original_params in original_state_dict .items ():
730- if layer_name in converted_state_dict and layer_name != "lm_head.weight" :
731- # quantized models do not contain "lm_head.weight" layer
740+ if layer_name in converted_state_dict :
732741 self .assertTrue (original_params .shape == converted_state_dict [layer_name ].shape )
733742 torch .testing .assert_close (original_params , converted_state_dict [layer_name ])
743+ else :
744+ raise ValueError (f"Layer { layer_name } is not presented in GGUF model" )
734745
735746 def test_starcoder2_q6_k (self ):
736747 example_function_text = "def print_hello_world():"
@@ -748,6 +759,47 @@ def test_starcoder2_q6_k(self):
748759 EXPECTED_TEXT = 'def print_hello_world():\n print("Hello World")\n \n def print'
749760 self .assertEqual (tokenizer .decode (out [0 ], skip_special_tokens = True ), EXPECTED_TEXT )
750761
762+ def test_mamba_weights_conversion_fp16 (self ):
763+ original_model = AutoModelForCausalLM .from_pretrained (
764+ self .mamba_original_model_id ,
765+ torch_dtype = torch .float16 ,
766+ )
767+
768+ converted_model = AutoModelForCausalLM .from_pretrained (
769+ self .mamba_model_id ,
770+ gguf_file = self .fp16_mamba_model_id ,
771+ torch_dtype = torch .float16 ,
772+ )
773+
774+ converted_state_dict = converted_model .state_dict ()
775+ original_state_dict = original_model .state_dict ()
776+
777+ for layer_name , original_params in original_state_dict .items ():
778+ if layer_name in converted_state_dict :
779+ self .assertTrue (original_params .shape == converted_state_dict [layer_name ].shape )
780+ if "mixer.A_log" in layer_name :
781+ # we should increase tolerance after exponential reversing
782+ # and performing np.log(-weights) operation as numbers are slightly different
783+ torch .testing .assert_close (original_params , converted_state_dict [layer_name ], atol = 1e-3 , rtol = 1e-3 )
784+ else :
785+ torch .testing .assert_close (original_params , converted_state_dict [layer_name ])
786+ else :
787+ raise ValueError (f"Layer { layer_name } is not presented in GGUF model" )
788+
789+ def test_mamba_q6_k (self ):
790+ model = AutoModelForCausalLM .from_pretrained (
791+ self .mamba_model_id ,
792+ gguf_file = self .q6_k_mamba_model_id ,
793+ torch_dtype = torch .float16 ,
794+ )
795+
796+ tokenizer = AutoTokenizer .from_pretrained (self .mamba_model_id , gguf_file = self .q6_k_mamba_model_id )
797+ text = tokenizer (self .example_text , return_tensors = "pt" )["input_ids" ]
798+ out = model .generate (text , max_new_tokens = 10 )
799+
800+ EXPECTED_TEXT = "Hello,I answerthe question.\n \n A"
801+ self .assertEqual (tokenizer .decode (out [0 ], skip_special_tokens = True ), EXPECTED_TEXT )
802+
751803 def test_tokenization_xnli (self ):
752804 import tqdm
753805 from datasets import load_dataset
0 commit comments