Skip to content

Commit 03c1fe9

Browse files
Add the Quantizations Methods.
1 parent e33e301 commit 03c1fe9

File tree

2 files changed

+41
-14
lines changed

2 files changed

+41
-14
lines changed

quantllm/model/model.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,27 @@ def copy_model(self):
142142
if name in new_model.state_dict():
143143
new_model.state_dict()[name].copy_(param.cpu())
144144

145-
return new_model
145+
return new_model
146+
147+
def get_base_model(self):
148+
"""Get a fresh instance of the base model without device mapping."""
149+
try:
150+
# Create a fresh instance with minimal settings
151+
model = AutoModelForCausalLM.from_pretrained(
152+
self.config.model_name,
153+
low_cpu_mem_usage=True,
154+
torch_dtype=torch.float32,
155+
device_map=None # Important: disable device map
156+
)
157+
# Copy weights from current model
158+
with torch.no_grad():
159+
for name, param in self.model.named_parameters():
160+
if name in model.state_dict():
161+
param_data = param.data
162+
if hasattr(param_data, "cpu"):
163+
param_data = param_data.cpu()
164+
model.state_dict()[name].copy_(param_data)
165+
return model
166+
except Exception as e:
167+
logging.error(f"Error creating base model: {str(e)}")
168+
raise

quantllm/utils/benchmark.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def benchmark_quantizer(
5454
results = {}
5555
try:
5656
self._clear_memory()
57+
print(f"GPU memory before {name}: {torch.cuda.memory_allocated() / 1024**2:.1f}MB")
5758

5859
# Configure quantizer for memory efficiency
5960
mem_efficient_args = dict(quantizer_args)
@@ -66,23 +67,26 @@ def benchmark_quantizer(
6667
"percdamp": 0.01,
6768
"block_size": 128,
6869
})
70+
print(f"Creating model copy for {name}...")
71+
# Create a fresh model instance with same config
72+
config_dict = self.model.config.to_dict()
73+
config_dict.pop('_name_or_path', None) # Remove path to ensure clean config
6974

70-
print(f"Creating copy of model for {name}...")
71-
# Create a fresh model instance from pretrained
72-
model_clone = AutoModelForCausalLM.from_pretrained(
73-
self.model.config._name_or_path,
74-
low_cpu_mem_usage=True,
75-
torch_dtype=torch.float32,
76-
device_map=None # Important: disable device map for copying
77-
)
75+
model_clone = type(self.model)(self.model.config)
7876

7977
print(f"Copying parameters for {name}...")
80-
# Manually copy parameters to ensure proper copying
78+
# Copy parameters with proper CPU offloading
8179
with torch.no_grad():
82-
for name, param in self.model.named_parameters():
83-
if name in model_clone.state_dict():
84-
# Ensure parameter is on CPU for copying
85-
model_clone.state_dict()[name].copy_(param.cpu())
80+
state_dict = {}
81+
for param_name, param in self.model.state_dict().items():
82+
# Handle device placement during copy
83+
param_data = param.detach()
84+
if param_data.device.type != 'cpu':
85+
param_data = param_data.cpu()
86+
state_dict[param_name] = param_data
87+
88+
# Load state dict all at once
89+
model_clone.load_state_dict(state_dict)
8690

8791
# Initialize quantizer with model copy
8892
quantizer = quantizer_class(model=model_clone, **mem_efficient_args)

0 commit comments

Comments
 (0)