Skip to content

Commit 54e44e6

Browse files
Merge pull request #7 from codewithdark-git/fix/awq-quantized-linear-device-issue
Fix: Unify nn.Module device placement across all quantizers and base …
2 parents 676244a + 33e21ba commit 54e44e6

File tree

3 files changed

+3
-3
lines changed

3 files changed

+3
-3
lines changed

quantllm/quant/awq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def _quantize_layer(
195195
quantized = quantized.to(target_device)
196196

197197
# Ensure layer parameters are on the target_device for computation
198-
layer = move_to_device(layer, target_device)
198+
layer = layer.to(target_device)
199199

200200
# Copy bias if exists, ensuring it's on the target device
201201
if layer.bias is not None:

quantllm/quant/gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def _quantize_layer(
203203
"""Quantize a single layer to GGUF format with memory-efficient processing."""
204204
target_device = torch.device('cpu') if self.cpu_offload else self.device_manager.primary_device
205205

206-
layer = move_to_device(layer, target_device)
206+
layer = layer.to(target_device)
207207

208208
# Initialize quantized layer and move to target_device
209209
quantized = QuantizedLinear(

quantllm/quant/gptq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def _quantize_layer(self, layer: nn.Linear, H: torch.Tensor) -> QuantizedLinear:
187187
# Ensure tensors are on the correct device
188188
H = move_to_device(H, target_device)
189189
# Original layer's weights should be moved to target_device before processing
190-
layer = move_to_device(layer, target_device)
190+
layer = layer.to(target_device)
191191
W = layer.weight.data # W is now on target_device
192192

193193
# Initialize quantized layer

0 commit comments

Comments
 (0)