File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change 1
1
import gc
2
2
import re
3
3
from typing import List , Tuple
4
+ import copy
4
5
5
6
import torch
6
7
import tqdm
@@ -47,7 +48,7 @@ def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]:
47
48
)
48
49
else :
49
50
min_val , max_val = tensor .aminmax ()
50
- amax = min_val .abs (). max ( max_val .abs ())
51
+ amax = torch . maximum ( min_val .abs (), max_val .abs ())
51
52
scale = finfo .max / amax .clamp (min = 1e-12 )
52
53
# scale and clamp the tensor to bring it to
53
54
# the representative range of float8 data type
@@ -202,8 +203,8 @@ def quantize_weights(
202
203
or name in quantize_config .ignored_layers
203
204
):
204
205
continue
205
- quant_weight , quant_scale = per_tensor_quantize (linear .weight . clone () )
206
- bias = linear .bias . clone ( ) if linear .bias is not None else None
206
+ quant_weight , quant_scale = per_tensor_quantize (linear .weight )
207
+ bias = copy . deepcopy ( linear .bias ) if linear .bias is not None else None
207
208
quant_linear = FP8DynamicLinear (quant_weight , quant_scale , bias )
208
209
replace_module (model , name , quant_linear )
209
210
del linear .weight
You can’t perform that action at this time.
0 commit comments