Skip to content

Commit ffea17e

Browse files
authored
Merge pull request #14 from neuralmagic/more-aggressive-cleanup
Perform more aggressive cleanup during weight quantization and add tqdm
2 parents fc895fd + 5e876d2 commit ffea17e

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

auto_fp8/quantize.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ def quantize_weights(
195195
quantize_config: BaseQuantizeConfig,
196196
ignored_layers: List[str] = [],
197197
):
198-
for name, linear in model.named_modules():
198+
named_modules = list(model.named_modules())
199+
for name, linear in tqdm.tqdm(named_modules, desc="Quantizing weights"):
199200
if (
200201
not isinstance(linear, torch.nn.Linear)
201202
or name in quantize_config.ignored_layers
@@ -205,7 +206,7 @@ def quantize_weights(
205206
quant_linear = FP8DynamicLinear(quant_weight, quant_scale, linear.bias)
206207
replace_module(model, name, quant_linear)
207208
del linear
208-
cleanup_memory()
209+
cleanup_memory()
209210

210211

211212
def quantize_activations(
@@ -214,6 +215,7 @@ def quantize_activations(
214215
calibration_tokens,
215216
ignored_layers: List[str] = [],
216217
):
218+
# Replace weight quantizer with a dynamic activation quantizer observer
217219
for name, dynamic_quant_linear in model.named_modules():
218220
if (
219221
not isinstance(dynamic_quant_linear, FP8DynamicLinear)
@@ -229,14 +231,14 @@ def quantize_activations(
229231
del dynamic_quant_linear
230232
cleanup_memory()
231233

232-
# Calibration.
233-
with tqdm.tqdm(total=calibration_tokens.shape[0], desc="Calibrating") as pbar:
234+
# Pass through calibration data to measure activation scales
235+
with tqdm.tqdm(total=calibration_tokens.shape[0], desc="Calibrating activation scales") as pbar:
234236
for row_idx in range(calibration_tokens.shape[0]):
235237
model(calibration_tokens[row_idx].reshape(1, -1))
236238
cleanup_memory()
237239
pbar.update(1)
238240

239-
# Replace dynamic quantizer with StaticLinear for export
241+
# Replace dynamic quantizer observer with StaticLinear for export
240242
for name, quantizer in model.named_modules():
241243
if (
242244
not isinstance(quantizer, FP8StaticLinearQuantizer)

0 commit comments

Comments
 (0)