File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -407,6 +407,7 @@ def quantize_activations(
407
407
cleanup_memory ()
408
408
409
409
# Pass through calibration data to measure activation scales
410
+ << << << < HEAD
410
411
< << << << HEAD
411
412
with torch .inference_mode ():
412
413
with tqdm .tqdm (total = calibration_tokens .shape [0 ], desc = "Calibrating activation scales" ) as pbar :
@@ -415,14 +416,27 @@ def quantize_activations(
415
416
cleanup_memory ()
416
417
pbar .update (1 )
417
418
== == == =
419
+ == == == =
420
+ >> >> >> > 57 c31bb (Use `torch.inference_mode()` for lower memory usage during calibration (#20))
418
421
with tqdm .tqdm (
419
422
total = calibration_tokens .shape [0 ], desc = "Calibrating activation scales"
420
423
) as pbar :
421
424
for row_idx in range (calibration_tokens .shape [0 ]):
422
425
model (calibration_tokens [row_idx ].reshape (1 , - 1 ))
423
426
cleanup_memory ()
424
427
pbar .update (1 )
428
+ << < << << HEAD
425
429
>> > >> >> 3 ee9283 (Support calibrating kv cache scales )
430
+ == == == =
431
+ == == == =
432
+ with torch .inference_mode ():
433
+ with tqdm .tqdm (total = calibration_tokens .shape [0 ], desc = "Calibrating activation scales" ) as pbar :
434
+ for row_idx in range (calibration_tokens .shape [0 ]):
435
+ model (calibration_tokens [row_idx ].reshape (1 , - 1 ))
436
+ cleanup_memory ()
437
+ pbar .update (1 )
438
+ >> > >> >> b1c6ad6 (Use `torch.inference_mode()` for lower memory usage during calibration (#20))
439
+ >> >> >> > 57 c31bb (Use `torch.inference_mode()` for lower memory usage during calibration (#20))
426
440
427
441
# Replace dynamic quantizer observer with StaticLinear for export
428
442
for name , quantizer in model .named_modules ():
You can’t perform that action at this time.
0 commit comments