@@ -7950,6 +7950,119 @@ def set_vocab(self):
79507950 self .gguf_writer .add_chat_template (chat_template )
79517951
79527952
7953+ @ModelBase .register ("GptOssForCausalLM" )
7954+ class GptOssModel (TextModel ):
7955+ model_arch = gguf .MODEL_ARCH .GPT_OSS
7956+
7957+ def transform_nibble_layout (self , tensor ):
7958+ assert tensor .dtype == torch .uint8
7959+ assert tensor .shape [- 1 ] == 16
7960+ # swap nibbles
7961+ t_lo = tensor & 0x0F
7962+ t_hi = tensor & 0xF0
7963+ t_swapped = (t_lo << 4 ) | (t_hi >> 4 )
7964+ tensor = t_swapped
7965+ # transform aaaa...bbbb... to abababab...
7966+ blk_a , blk_b = tensor .chunk (2 , dim = - 1 )
7967+ # get a_
7968+ blk_a0 = (blk_a & 0xF0 ).view (- 1 , 1 )
7969+ blk_a1 = (blk_a << 4 ).view (- 1 , 1 )
7970+ blk_a = torch .stack ((blk_a0 , blk_a1 ), dim = 2 ).view (tensor .shape )
7971+ # get _b
7972+ blk_b0 = (blk_b >> 4 ).view (- 1 , 1 )
7973+ blk_b1 = (blk_b & 0x0F ).view (- 1 , 1 )
7974+ blk_b = torch .stack ((blk_b0 , blk_b1 ), dim = 2 ).view (tensor .shape )
7975+ # swap once more
7976+ out = blk_a | blk_b
7977+ out_h = out & 0xF0
7978+ out_l = out & 0x0F
7979+ out = (out_h >> 4 ) | (out_l << 4 )
7980+ return out
7981+
7982+ def repack_mxfp4 (self , new_name : str , blocks : Tensor , scales : Tensor ):
7983+ assert blocks .dtype == torch .uint8
7984+ assert scales .dtype == torch .uint8
7985+ scales = scales .unsqueeze (- 1 )
7986+ assert len (blocks .shape ) == 4
7987+ assert len (scales .shape ) == 4
7988+ blocks = self .transform_nibble_layout (blocks )
7989+ new_data = torch .concat ((scales , blocks ), dim = - 1 )
7990+ new_shape = [new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * 32 ]
7991+ logger .info (f"Repacked { new_name } with shape { new_shape } and quantization MXFP4" )
7992+ # flatten last dim
7993+ new_data = new_data .view (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
7994+ new_data = new_data .numpy ()
7995+ self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
7996+
7997+ def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
7998+ blocks0 : Tensor = torch .zeros (1 )
7999+ blocks1 : Tensor = torch .zeros (1 )
8000+ found_mxfp4_tensors = False
8001+ # we assume that tensors are loaded in the correct order
8002+ for name , data_torch in self .get_tensors ():
8003+ if "mlp.experts.down_proj_blocks" in name :
8004+ blocks0 = data_torch
8005+ elif "mlp.experts.down_proj_scales" in name :
8006+ new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
8007+ self .repack_mxfp4 (new_name , blocks0 , data_torch )
8008+ found_mxfp4_tensors = True
8009+ elif "mlp.experts.gate_up_proj_blocks" in name :
8010+ blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
8011+ elif "mlp.experts.gate_up_proj_scales" in name :
8012+ scales0 , scales1 = data_torch [:, ::2 , :], data_torch [:, 1 ::2 , :]
8013+ new_name_gate = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "gate_proj.weight" ))
8014+ new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
8015+ self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
8016+ self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
8017+ found_mxfp4_tensors = True
8018+ if not found_mxfp4_tensors :
8019+ raise ValueError ("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model." )
8020+ return []
8021+
8022+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
8023+ del bid # unused
8024+
8025+ if "sinks" in name :
8026+ name += ".weight"
8027+
8028+ # correct naming for down_proj
8029+ if "down_proj" in name :
8030+ if name .endswith ("_bias" ):
8031+ name = name .replace ("down_proj_bias" , "down_proj.bias" )
8032+ else :
8033+ return []
8034+
8035+ # split the gate_up into gate and up
8036+ if "gate_up_proj" in name :
8037+ if name .endswith ("_bias" ):
8038+ name_up = name .replace ("gate_up_proj_bias" , "up_proj.bias" )
8039+ name_gate = name .replace ("gate_up_proj_bias" , "gate_proj.bias" )
8040+ gate_proj_bias , up_proj_bias = data_torch [..., ::2 ], data_torch [..., 1 ::2 ]
8041+ return [
8042+ (self .map_tensor_name (name_gate ), gate_proj_bias ),
8043+ (self .map_tensor_name (name_up ), up_proj_bias )
8044+ ]
8045+ else :
8046+ return []
8047+
8048+ return [(self .map_tensor_name (name ), data_torch )]
8049+
8050+ def set_vocab (self ):
8051+ self ._set_vocab_gpt2 ()
8052+
8053+ def set_gguf_parameters (self ):
8054+ super ().set_gguf_parameters ()
8055+ self .gguf_writer .add_sliding_window (self .hparams ["sliding_window" ])
8056+ self .gguf_writer .add_expert_feed_forward_length (self .hparams ["intermediate_size" ])
8057+
8058+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
8059+ rope_type = rope_scaling .get ("rope_type" , rope_scaling .get ("type" ))
8060+ assert rope_type == "yarn" , f"GPT-OSS only supports yarn rope scaling, got { rope_type } "
8061+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
8062+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
8063+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling .get ("original_max_position_embeddings" , 4096 ))
8064+
8065+
79538066@ModelBase .register ("Lfm2ForCausalLM" )
79548067@ModelBase .register ("LFM2ForCausalLM" )
79558068class LFM2Model (TextModel ):
@@ -8089,6 +8202,7 @@ class LazyTorchTensor(gguf.LazyBase):
80898202 _dtype_map : dict [torch .dtype , type ] = {
80908203 torch .float16 : np .float16 ,
80918204 torch .float32 : np .float32 ,
8205+ torch .uint8 : np .uint8 ,
80928206 }
80938207
80948208 # used for safetensors slices
0 commit comments