fix lazy loading

ngxson · ngxson · commit ed7b7c721447 · 2025-11-06T23:02:37.000+01:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -352,6 +352,8 @@ def dequant_compressed_tensor(weight: Tensor, scale: Tensor) -> Tensor:
                     device=weight.device,
                     dtype=torch.int32,
                 )
+                if self.lazy:
+                    unpacked = LazyTorchTensor.from_eager(unpacked)
                 for i in range(pack_factor):
                     unpacked[:, i::pack_factor] = (weight >> (num_bits * i)) & mask
                 # TODO: may need to unpad

Original file line number	Diff line number	Diff line change
`@@ -352,6 +352,8 @@ def dequant_compressed_tensor(weight: Tensor, scale: Tensor) -> Tensor:`
`352`	`352`	`device=weight.device,`
`353`	`353`	`dtype=torch.int32,`
`354`	`354`	`)`
	`355`	`+ if self.lazy:`
	`356`	`+ unpacked = LazyTorchTensor.from_eager(unpacked)`
`355`	`357`	`for i in range(pack_factor):`
`356`	`358`	`unpacked[:, i::pack_factor] = (weight >> (num_bits * i)) & mask`
`357`	`359`	`# TODO: may need to unpad`