Skip to content

Commit 4cf69df

Browse files
committed
properly use lazy tensor
1 parent 04cfb6d commit 4cf69df

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

convert_hf_to_gguf.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7810,7 +7810,6 @@ class GptOssModel(TextModel):
78107810
def transform_nibble_layout(self, tensor):
78117811
assert tensor.dtype == torch.uint8
78127812
assert tensor.shape[-1] == 16
7813-
tensor = tensor.clone().to(device="cpu")
78147813
# swap nibbles
78157814
t_lo = tensor & 0x0F
78167815
t_hi = tensor & 0xF0
@@ -7839,15 +7838,13 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
78397838
scales = scales.unsqueeze(-1)
78407839
assert len(blocks.shape) == 4
78417840
assert len(scales.shape) == 4
7842-
# convert to numpy
7843-
scales = scales.to_eager(scales).numpy()
7844-
blocks = blocks.to_eager(blocks)
7845-
blocks = self.transform_nibble_layout(blocks).numpy()
7846-
new_data = np.concatenate([scales, blocks], axis=-1)
7841+
blocks = self.transform_nibble_layout(blocks)
7842+
new_data = torch.concat((scales, blocks), dim=-1)
78477843
new_shape = [new_data.shape[0], new_data.shape[1], new_data.shape[2] * 32]
78487844
logger.info(f"Repacked {new_name} with shape {new_shape} and quantization MXFP4")
78497845
# flatten last dim
7850-
new_data = new_data.reshape(new_data.shape[0], new_data.shape[1], new_data.shape[2] * new_data.shape[3])
7846+
new_data = new_data.view(new_data.shape[0], new_data.shape[1], new_data.shape[2] * new_data.shape[3])
7847+
new_data = new_data.numpy()
78517848
self.gguf_writer.add_tensor(new_name, new_data, raw_dtype=gguf.GGMLQuantizationType.MXFP4)
78527849

78537850
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:

0 commit comments

Comments
 (0)