@@ -8029,7 +8029,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
80298029 if "down_proj" in name :
80308030 if name .endswith ("_bias" ):
80318031 name = name .replace ("down_proj_bias" , "down_proj.bias" )
8032+ elif "_blocks" not in name and "_scales" not in name :
8033+ logger .warning (f"{ name } is not in MXFP4, performance may be degraded" )
8034+ name = name .replace ("down_proj" , "down_proj.weight" )
8035+ data_torch = data_torch .transpose (- 1 , - 2 )
80328036 else :
8037+ # otherwise, it should already be repacked to ggml MXFP4 format
80338038 return []
80348039
80358040 # split the gate_up into gate and up
@@ -8042,7 +8047,18 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
80428047 (self .map_tensor_name (name_gate ), gate_proj_bias ),
80438048 (self .map_tensor_name (name_up ), up_proj_bias )
80448049 ]
8050+ elif "_blocks" not in name and "_scales" not in name :
8051+ logger .warning (f"{ name } is not in MXFP4, performance may be degraded" )
8052+ name_up = name .replace ("gate_up_proj" , "up_proj.weight" )
8053+ name_gate = name .replace ("gate_up_proj" , "gate_proj.weight" )
8054+ data_torch = data_torch .transpose (- 1 , - 2 )
8055+ gate_proj_weight , up_proj_weight = data_torch [:, ::2 , :], data_torch [:, 1 ::2 , :]
8056+ return [
8057+ (self .map_tensor_name (name_gate ), gate_proj_weight ),
8058+ (self .map_tensor_name (name_up ), up_proj_weight )
8059+ ]
80458060 else :
8061+ # otherwise, it should already be repacked to ggml MXFP4 format
80468062 return []
80478063
80488064 return [(self .map_tensor_name (name ), data_torch )]
0 commit comments