|
30 | 30 | import sys |
31 | 31 |
|
32 | 32 | # Third Party |
33 | | -from torch.utils.tensorboard import SummaryWriter |
34 | 33 | from tqdm import tqdm |
35 | 34 | import numpy as np |
36 | 35 | import pandas as pd |
@@ -1449,49 +1448,57 @@ def ptq_mod_optim_lm(_model, m, layers, qcfg, optim_mode="both", **kwargs): |
1449 | 1448 | # show loss on pbar |
1450 | 1449 | pbar2.set_description(pbar_desc + f"{PTQloss:.6f}") |
1451 | 1450 |
|
1452 | | - if isinstance(qcfg["tb_writer"], SummaryWriter) and isOutput: |
1453 | | - scalars2log = {} |
1454 | | - hist2log = {} |
| 1451 | + if available_packages["tensorboard"]: |
| 1452 | + # Third Party |
| 1453 | + from torch.utils.tensorboard import SummaryWriter |
1455 | 1454 |
|
1456 | | - for k, v in loss4plot.items(): # plot loss |
1457 | | - scalars2log[f"{mod_name}/PTQloss_{k}"] = v |
1458 | | - for k, v in m.named_buffers(): # plot cv, delta, zp, alpha, and lr |
1459 | | - if any(kb in k for kb in ["delta", "zero_point", "clip_val"]): |
1460 | | - if len(v.shape) > 0 and v.shape[0] > 1: # perCh |
1461 | | - hist2log[f"{mod_name}/{k}"] = v |
1462 | | - else: |
1463 | | - scalars2log[f"{mod_name}/{k}"] = v |
1464 | | - for p, pname in zip( |
1465 | | - optim_a.param_groups[0]["params"], param_names[1] |
1466 | | - ): # cva |
1467 | | - scalars2log[f"{mod_name}/{pname}"] = p.item() |
1468 | | - scalars2log[f"{mod_name}/LR_cv_a"] = optim_a.param_groups[0]["lr"] |
1469 | | - for p, pname in zip( |
1470 | | - optim_w.param_groups[0]["params"], param_names[0] |
1471 | | - ): # weights |
1472 | | - hist2log[f"{mod_name}/{pname}"] = p |
1473 | | - scalars2log[f"{mod_name}/LR_w"] = optim_w.param_groups[0]["lr"] |
1474 | | - for p, pname in zip( |
1475 | | - optim_w.param_groups[1]["params"], param_names[2] |
1476 | | - ): # cvw |
1477 | | - if "alpha" in pname: |
1478 | | - hist2log[f"{mod_name}/{pname}"] = p |
1479 | | - else: |
| 1455 | + if isinstance(qcfg["tb_writer"], SummaryWriter) and isOutput: |
| 1456 | + scalars2log = {} |
| 1457 | + hist2log = {} |
| 1458 | + |
| 1459 | + for k, v in loss4plot.items(): # plot loss |
| 1460 | + scalars2log[f"{mod_name}/PTQloss_{k}"] = v |
| 1461 | + for k, v in m.named_buffers(): # plot cv, delta, zp, alpha, and lr |
| 1462 | + if any(kb in k for kb in ["delta", "zero_point", "clip_val"]): |
| 1463 | + if len(v.shape) > 0 and v.shape[0] > 1: # perCh |
| 1464 | + hist2log[f"{mod_name}/{k}"] = v |
| 1465 | + else: |
| 1466 | + scalars2log[f"{mod_name}/{k}"] = v |
| 1467 | + for p, pname in zip( |
| 1468 | + optim_a.param_groups[0]["params"], param_names[1] |
| 1469 | + ): # cva |
1480 | 1470 | scalars2log[f"{mod_name}/{pname}"] = p.item() |
1481 | | - scalars2log[f"{mod_name}/LR_cvw"] = optim_w.param_groups[1]["lr"] |
1482 | | - if "adaround" in qcfg["qw_mode"]: |
1483 | | - scalars2log[f"{mod_name}/AdaR_beta"] = ( |
1484 | | - loss_func.temp_decay.curr_beta |
1485 | | - ) |
1486 | | - for lidx, l in enumerate(layers): |
1487 | | - if not hasattr(l, "quantize_m1"): |
1488 | | - hist2log[f"{mod_name}/W{lidx}"] = l.weight |
| 1471 | + scalars2log[f"{mod_name}/LR_cv_a"] = optim_a.param_groups[0][ |
| 1472 | + "lr" |
| 1473 | + ] |
| 1474 | + for p, pname in zip( |
| 1475 | + optim_w.param_groups[0]["params"], param_names[0] |
| 1476 | + ): # weights |
| 1477 | + hist2log[f"{mod_name}/{pname}"] = p |
| 1478 | + scalars2log[f"{mod_name}/LR_w"] = optim_w.param_groups[0]["lr"] |
| 1479 | + for p, pname in zip( |
| 1480 | + optim_w.param_groups[1]["params"], param_names[2] |
| 1481 | + ): # cvw |
| 1482 | + if "alpha" in pname: |
| 1483 | + hist2log[f"{mod_name}/{pname}"] = p |
| 1484 | + else: |
| 1485 | + scalars2log[f"{mod_name}/{pname}"] = p.item() |
| 1486 | + scalars2log[f"{mod_name}/LR_cvw"] = optim_w.param_groups[1][ |
| 1487 | + "lr" |
| 1488 | + ] |
| 1489 | + if "adaround" in qcfg["qw_mode"]: |
| 1490 | + scalars2log[f"{mod_name}/AdaR_beta"] = ( |
| 1491 | + loss_func.temp_decay.curr_beta |
| 1492 | + ) |
| 1493 | + for lidx, l in enumerate(layers): |
| 1494 | + if not hasattr(l, "quantize_m1"): |
| 1495 | + hist2log[f"{mod_name}/W{lidx}"] = l.weight |
1489 | 1496 |
|
1490 | | - # write every in one shot will mess up the folder, better write them one by one |
1491 | | - for n, v in scalars2log.items(): |
1492 | | - qcfg["tb_writer"].add_scalar(n, v, Niter) |
1493 | | - for n, v in hist2log.items(): |
1494 | | - qcfg["tb_writer"].add_histogram(n, v, Niter) |
| 1497 | + # write every in one shot will mess up the folder, better write them one by one |
| 1498 | + for n, v in scalars2log.items(): |
| 1499 | + qcfg["tb_writer"].add_scalar(n, v, Niter) |
| 1500 | + for n, v in hist2log.items(): |
| 1501 | + qcfg["tb_writer"].add_histogram(n, v, Niter) |
1495 | 1502 |
|
1496 | 1503 | for s in scheduler: |
1497 | 1504 | s.step() # we set up scheduler based on Nouterloop, not inner |
|
0 commit comments