Skip to content

Commit 9f7d2f4

Browse files
Add the Quantizations Methods.
1 parent f630e89 commit 9f7d2f4

File tree

4 files changed

+17
-19
lines changed

4 files changed

+17
-19
lines changed

quantllm/quant/awq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ def quantize(
7575
self._process_activation_stats()
7676

7777
# Quantize the model layer by layer
78-
for name, module in self.model.named_modules():
78+
for name, module in self.model.named_modules():
7979
if isinstance(module, nn.Linear):
80-
self.logger.info(f"Processing layer: {name}")
80+
self.logger.log_info(f"Processing layer: {name}")
8181

8282
# Get activation scale for this layer
8383
act_scale = self.act_scales.get(name)

quantllm/quant/gguf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ def __init__(
3939
self.use_packed = use_packed
4040
self.legacy_format = legacy_format
4141
self.batch_size = batch_size
42-
4342
def quantize(
4443
self,
4544
calibration_data: Optional[torch.Tensor] = None
@@ -58,7 +57,7 @@ def quantize(
5857
# Convert linear layers to quantized versions
5958
for name, module in self.model.named_modules():
6059
if isinstance(module, nn.Linear):
61-
self.logger.info(f"Processing layer: {name}")
60+
self.logger.log_info(f"Processing layer: {name}")
6261

6362
# Create quantized layer
6463
layer_stats = stats.get(name, None)

quantllm/quant/gptq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ def quantize(self, calibration_data: Optional[torch.Tensor] = None) -> PreTraine
6161
self.model.eval()
6262

6363
# Process layers
64-
for name, module in self.model.named_modules():
64+
for name, module in self.model.named_modules():
6565
if isinstance(module, nn.Linear):
66-
self.logger.info(f"Processing layer: {name}")
66+
self.logger.log_info(f"Processing layer: {name}")
6767

6868
# Compute Hessian approximation
6969
self.H[name] = self._compute_hessian(module, calibration_data)

quantllm/quant/quantization_engine.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,8 @@ def quantize_model(
202202
self._quantize_layers(model, stats)
203203

204204
return model
205-
206205
except Exception as e:
207-
self.logger.error(f"Error during quantization: {str(e)}")
206+
self.logger.log_error(f"Error during quantization: {str(e)}")
208207
raise
209208

210209
def _prepare_model(self, model: PreTrainedModel) -> PreTrainedModel:
@@ -375,11 +374,11 @@ def export_model(
375374
model.optimize()
376375
onnx.save(model, path)
377376

378-
else:
377+
else:
379378
raise ValueError(f"Unsupported export format: {format}")
380379

381380
except Exception as e:
382-
self.logger.error(f"Error exporting model: {str(e)}")
381+
self.logger.log_error(f"Error exporting model: {str(e)}")
383382
raise
384383

385384
def benchmark(
@@ -441,8 +440,8 @@ def benchmark(
441440
"p99_latency": torch.quantile(latencies, 0.99).item()
442441
}
443442

444-
except Exception as e:
445-
self.logger.error(f"Error during benchmarking: {str(e)}")
443+
except Exception as e:
444+
self.logger.log_error(f"Error during benchmarking: {str(e)}")
446445
raise
447446

448447
class BaseQuantizer:
@@ -560,15 +559,15 @@ def _prepare_model(self, original_model: PreTrainedModel):
560559
from transformers import AutoModelForCausalLM
561560

562561
try:
563-
# Create new model instance
564-
self.logger.info("Creating new model instance...")
562+
# Create new model instance
563+
self.logger.log_info("Creating new model instance...")
565564
new_model = AutoModelForCausalLM.from_config(
566565
self.model_config,
567566
trust_remote_code=True
568567
)
569568

570-
# Copy state dict with proper device handling
571-
self.logger.info("Copying model parameters...")
569+
# Copy state dict with proper device handling
570+
self.logger.log_info("Copying model parameters...")
572571
with torch.no_grad():
573572
state_dict = {}
574573
for name, param in original_model.state_dict().items():
@@ -586,10 +585,10 @@ def _prepare_model(self, original_model: PreTrainedModel):
586585
new_model = new_model.to(self.device_manager.primary_device)
587586

588587
self._model = new_model
589-
self.logger.info("Model preparation completed successfully")
588+
self.logger.log_info("Model preparation completed successfully")
590589

591-
except Exception as e:
592-
self.logger.error(f"Failed to prepare model: {str(e)}")
590+
except Exception as e:
591+
self.logger.log_error(f"Failed to prepare model: {str(e)}")
593592
raise
594593

595594
def prepare_calibration_data(self, calibration_data: torch.Tensor) -> torch.Tensor:

0 commit comments

Comments
 (0)