File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -272,13 +272,22 @@ def quantize_activations(
272
272
cleanup_memory ()
273
273
274
274
# Pass through calibration data to measure activation scales
275
+ < << << << HEAD
275
276
with tqdm .tqdm (
276
277
total = calibration_tokens .shape [0 ], desc = "Calibrating activation scales"
277
278
) as pbar :
278
279
for row_idx in range (calibration_tokens .shape [0 ]):
279
280
model (calibration_tokens [row_idx ].reshape (1 , - 1 ))
280
281
cleanup_memory ()
281
282
pbar .update (1 )
283
+ == == == =
284
+ with torch .inference_mode ():
285
+ with tqdm .tqdm (total = calibration_tokens .shape [0 ], desc = "Calibrating activation scales" ) as pbar :
286
+ for row_idx in range (calibration_tokens .shape [0 ]):
287
+ model (calibration_tokens [row_idx ].reshape (1 , - 1 ))
288
+ cleanup_memory ()
289
+ pbar .update (1 )
290
+ >> >> >> > b1c6ad6 (Use `torch.inference_mode()` for lower memory usage during calibration (#20))
282
291
283
292
# Replace dynamic quantizer observer with StaticLinear for export
284
293
for name , quantizer in model .named_modules ():
You can’t perform that action at this time.
0 commit comments