@@ -7810,7 +7810,6 @@ class GptOssModel(TextModel):
78107810 def transform_nibble_layout (self , tensor ):
78117811 assert tensor .dtype == torch .uint8
78127812 assert tensor .shape [- 1 ] == 16
7813- tensor = tensor .clone ().to (device = "cpu" )
78147813 # swap nibbles
78157814 t_lo = tensor & 0x0F
78167815 t_hi = tensor & 0xF0
@@ -7839,15 +7838,13 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
78397838 scales = scales .unsqueeze (- 1 )
78407839 assert len (blocks .shape ) == 4
78417840 assert len (scales .shape ) == 4
7842- # convert to numpy
7843- scales = scales .to_eager (scales ).numpy ()
7844- blocks = blocks .to_eager (blocks )
7845- blocks = self .transform_nibble_layout (blocks ).numpy ()
7846- new_data = np .concatenate ([scales , blocks ], axis = - 1 )
7841+ blocks = self .transform_nibble_layout (blocks )
7842+ new_data = torch .concat ((scales , blocks ), dim = - 1 )
78477843 new_shape = [new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * 32 ]
78487844 logger .info (f"Repacked { new_name } with shape { new_shape } and quantization MXFP4" )
78497845 # flatten last dim
7850- new_data = new_data .reshape (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
7846+ new_data = new_data .view (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
7847+ new_data = new_data .numpy ()
78517848 self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
78527849
78537850 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
0 commit comments