@@ -7950,6 +7950,119 @@ def set_vocab(self):
79507950            self .gguf_writer .add_chat_template (chat_template )
79517951
79527952
7953+ @ModelBase .register ("GptOssForCausalLM" ) 
7954+ class  GptOssModel (TextModel ):
7955+     model_arch  =  gguf .MODEL_ARCH .GPT_OSS 
7956+ 
7957+     def  transform_nibble_layout (self , tensor ):
7958+         assert  tensor .dtype  ==  torch .uint8 
7959+         assert  tensor .shape [- 1 ] ==  16 
7960+         # swap nibbles 
7961+         t_lo  =  tensor  &  0x0F 
7962+         t_hi  =  tensor  &  0xF0 
7963+         t_swapped  =  (t_lo  <<  4 ) |  (t_hi  >>  4 )
7964+         tensor  =  t_swapped 
7965+         # transform aaaa...bbbb... to abababab... 
7966+         blk_a , blk_b  =  tensor .chunk (2 , dim = - 1 )
7967+         # get a_ 
7968+         blk_a0  =  (blk_a  &  0xF0 ).view (- 1 , 1 )
7969+         blk_a1  =  (blk_a  <<  4 ).view (- 1 , 1 )
7970+         blk_a  =  torch .stack ((blk_a0 , blk_a1 ), dim = 2 ).view (tensor .shape )
7971+         # get _b 
7972+         blk_b0  =  (blk_b  >>  4 ).view (- 1 , 1 )
7973+         blk_b1  =  (blk_b  &  0x0F ).view (- 1 , 1 )
7974+         blk_b  =  torch .stack ((blk_b0 , blk_b1 ), dim = 2 ).view (tensor .shape )
7975+         # swap once more 
7976+         out  =  blk_a  |  blk_b 
7977+         out_h  =  out  &  0xF0 
7978+         out_l  =  out  &  0x0F 
7979+         out  =  (out_h  >>  4 ) |  (out_l  <<  4 )
7980+         return  out 
7981+ 
7982+     def  repack_mxfp4 (self , new_name : str , blocks : Tensor , scales : Tensor ):
7983+         assert  blocks .dtype  ==  torch .uint8 
7984+         assert  scales .dtype  ==  torch .uint8 
7985+         scales  =  scales .unsqueeze (- 1 )
7986+         assert  len (blocks .shape ) ==  4 
7987+         assert  len (scales .shape ) ==  4 
7988+         blocks  =  self .transform_nibble_layout (blocks )
7989+         new_data  =  torch .concat ((scales , blocks ), dim = - 1 )
7990+         new_shape  =  [new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] *  32 ]
7991+         logger .info (f"Repacked { new_name }   with shape { new_shape }   and quantization MXFP4" )
7992+         # flatten last dim 
7993+         new_data  =  new_data .view (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] *  new_data .shape [3 ])
7994+         new_data  =  new_data .numpy ()
7995+         self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
7996+ 
7997+     def  generate_extra_tensors (self ) ->  Iterable [tuple [str , Tensor ]]:
7998+         blocks0 : Tensor  =  torch .zeros (1 )
7999+         blocks1 : Tensor  =  torch .zeros (1 )
8000+         found_mxfp4_tensors  =  False 
8001+         # we assume that tensors are loaded in the correct order 
8002+         for  name , data_torch  in  self .get_tensors ():
8003+             if  "mlp.experts.down_proj_blocks"  in  name :
8004+                 blocks0  =  data_torch 
8005+             elif  "mlp.experts.down_proj_scales"  in  name :
8006+                 new_name  =  self .map_tensor_name (name .replace ("_scales" , ".weight" ))
8007+                 self .repack_mxfp4 (new_name , blocks0 , data_torch )
8008+                 found_mxfp4_tensors  =  True 
8009+             elif  "mlp.experts.gate_up_proj_blocks"  in  name :
8010+                 blocks0 , blocks1  =  data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
8011+             elif  "mlp.experts.gate_up_proj_scales"  in  name :
8012+                 scales0 , scales1  =  data_torch [:, ::2 , :], data_torch [:, 1 ::2 , :]
8013+                 new_name_gate  =  self .map_tensor_name (name .replace ("gate_up_proj_scales" , "gate_proj.weight" ))
8014+                 new_name_up  =  self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
8015+                 self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
8016+                 self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
8017+                 found_mxfp4_tensors  =  True 
8018+         if  not  found_mxfp4_tensors :
8019+             raise  ValueError ("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model." )
8020+         return  []
8021+ 
8022+     def  modify_tensors (self , data_torch : Tensor , name : str , bid : int  |  None ) ->  Iterable [tuple [str , Tensor ]]:
8023+         del  bid   # unused 
8024+ 
8025+         if  "sinks"  in  name :
8026+             name  +=  ".weight" 
8027+ 
8028+         # correct naming for down_proj 
8029+         if  "down_proj"  in  name :
8030+             if  name .endswith ("_bias" ):
8031+                 name  =  name .replace ("down_proj_bias" , "down_proj.bias" )
8032+             else :
8033+                 return  []
8034+ 
8035+         # split the gate_up into gate and up 
8036+         if  "gate_up_proj"  in  name :
8037+             if  name .endswith ("_bias" ):
8038+                 name_up  =  name .replace ("gate_up_proj_bias" , "up_proj.bias" )
8039+                 name_gate  =  name .replace ("gate_up_proj_bias" , "gate_proj.bias" )
8040+                 gate_proj_bias , up_proj_bias  =  data_torch [..., ::2 ], data_torch [..., 1 ::2 ]
8041+                 return  [
8042+                     (self .map_tensor_name (name_gate ), gate_proj_bias ),
8043+                     (self .map_tensor_name (name_up ), up_proj_bias )
8044+                 ]
8045+             else :
8046+                 return  []
8047+ 
8048+         return  [(self .map_tensor_name (name ), data_torch )]
8049+ 
8050+     def  set_vocab (self ):
8051+         self ._set_vocab_gpt2 ()
8052+ 
8053+     def  set_gguf_parameters (self ):
8054+         super ().set_gguf_parameters ()
8055+         self .gguf_writer .add_sliding_window (self .hparams ["sliding_window" ])
8056+         self .gguf_writer .add_expert_feed_forward_length (self .hparams ["intermediate_size" ])
8057+ 
8058+         rope_scaling  =  self .hparams .get ("rope_scaling" ) or  {}
8059+         rope_type  =  rope_scaling .get ("rope_type" , rope_scaling .get ("type" ))
8060+         assert  rope_type  ==  "yarn" , f"GPT-OSS only supports yarn rope scaling, got { rope_type }  " 
8061+         self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
8062+         self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
8063+         self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling .get ("original_max_position_embeddings" , 4096 ))
8064+ 
8065+ 
79538066@ModelBase .register ("Lfm2ForCausalLM" ) 
79548067@ModelBase .register ("LFM2ForCausalLM" ) 
79558068class  LFM2Model (TextModel ):
@@ -8089,6 +8202,7 @@ class LazyTorchTensor(gguf.LazyBase):
80898202    _dtype_map : dict [torch .dtype , type ] =  {
80908203        torch .float16 : np .float16 ,
80918204        torch .float32 : np .float32 ,
8205+         torch .uint8 : np .uint8 ,
80928206    }
80938207
80948208    # used for safetensors slices 
0 commit comments