Skip to content

Commit e33e301

Browse files
Add the Quantizations Methods.
1 parent aafd06d commit e33e301

File tree

3 files changed

+42
-284
lines changed

3 files changed

+42
-284
lines changed

quantllm/model/model.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,22 @@ def get_tokenizer(self):
124124

125125
def get_device(self):
126126
"""Get the current device."""
127-
return self._device
127+
return self._device
128+
129+
def copy_model(self):
130+
"""Create a deep copy of the model with all parameters on CPU."""
131+
# First, create a new instance with same config but minimal settings
132+
new_model = AutoModelForCausalLM.from_pretrained(
133+
self.config.model_name,
134+
low_cpu_mem_usage=True,
135+
torch_dtype=torch.float32,
136+
device_map=None # Disable device_map for copying
137+
)
138+
139+
# Copy parameters from current model
140+
with torch.no_grad():
141+
for name, param in self.model.named_parameters():
142+
if name in new_model.state_dict():
143+
new_model.state_dict()[name].copy_(param.cpu())
144+
145+
return new_model

quantllm/utils/benchmark.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def benchmark_quantizer(
4949
quantizer_args: Dict
5050
) -> Dict[str, float]:
5151
"""Benchmark a specific quantizer with memory management."""
52+
from transformers import AutoModelForCausalLM
53+
5254
results = {}
5355
try:
5456
self._clear_memory()
@@ -63,18 +65,29 @@ def benchmark_quantizer(
6365
mem_efficient_args.update({
6466
"percdamp": 0.01,
6567
"block_size": 128,
66-
}) # Create a deep copy of the model using from_pretrained
67-
config = self.model.config
68-
model_clone = type(self.model)(config)
69-
# Copy weights manually to ensure proper copying
70-
for param_name, param in self.model.state_dict().items():
71-
if param_name in model_clone.state_dict():
72-
model_clone.state_dict()[param_name].copy_(param)
68+
})
69+
70+
print(f"Creating copy of model for {name}...")
71+
# Create a fresh model instance from pretrained
72+
model_clone = AutoModelForCausalLM.from_pretrained(
73+
self.model.config._name_or_path,
74+
low_cpu_mem_usage=True,
75+
torch_dtype=torch.float32,
76+
device_map=None # Important: disable device map for copying
77+
)
78+
79+
print(f"Copying parameters for {name}...")
80+
# Manually copy parameters to ensure proper copying
81+
with torch.no_grad():
82+
for name, param in self.model.named_parameters():
83+
if name in model_clone.state_dict():
84+
# Ensure parameter is on CPU for copying
85+
model_clone.state_dict()[name].copy_(param.cpu())
7386

74-
# Initialize quantizer with model copy on CPU
87+
# Initialize quantizer with model copy
7588
quantizer = quantizer_class(model=model_clone, **mem_efficient_args)
7689

77-
# Move to device for quantization
90+
# Move to appropriate device
7891
if self.device == "cuda":
7992
quantizer.model = quantizer.model.cuda()
8093
cal_data = self.calibration_data.cuda()
@@ -84,6 +97,7 @@ def benchmark_quantizer(
8497
# Measure quantization time
8598
start_time = time.time()
8699

100+
print(f"Starting quantization for {name}...")
87101
if name == "AWQ":
88102
# AWQ uses batched processing
89103
cal_steps = min(20, len(cal_data))

quantllm/utils/benchmark_new.py

Lines changed: 0 additions & 274 deletions
This file was deleted.

0 commit comments

Comments
 (0)