Skip to content

Commit d02b2ed

Browse files
authored
Revert "add rtn quant func (#1564)" (#1593)
1 parent 79fa1a9 commit d02b2ed

File tree

11 files changed

+51
-950
lines changed

11 files changed

+51
-950
lines changed

auto_round/auto_scheme/delta_loss.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
)
3434
from auto_round.calib_dataset import get_dataloader
3535
from auto_round.data_type.gguf import (
36-
quant_tensor_gguf_opt_rtn_asym_dq,
37-
quant_tensor_gguf_opt_rtn_sym_dq,
36+
quant_tensor_gguf_asym_dq,
37+
quant_tensor_gguf_sym_dq,
3838
search_gguf_scale_min_asym,
3939
search_gguf_scale_min_sym,
4040
)
@@ -260,7 +260,7 @@ def _init_scale(self):
260260
scale, wmin, d_scale, d_wmin = search_gguf_scale_min_asym(tensor, bits, scale_dtype, imatrix)
261261
tensor = revert_tensor_by_pad(tensor, orig_shape=orig_shape, pad_len=pad_len)
262262

263-
qdq_w, _, _ = quant_tensor_gguf_opt_rtn_asym_dq(
263+
qdq_w, _, _ = quant_tensor_gguf_asym_dq(
264264
tensor=tensor,
265265
bits=bits,
266266
scale_dtype=scale_dtype,
@@ -275,7 +275,7 @@ def _init_scale(self):
275275
tensor, orig_shape, pad_len = reshape_pad_tensor_by_group_size(tensor, group_size)
276276
scale, d_scale = search_gguf_scale_min_sym(tensor, bits, imatrix, scale_dtype, split_num=1)
277277
tensor = revert_tensor_by_pad(tensor, orig_shape=orig_shape, pad_len=pad_len)
278-
qdq_w, _, _ = quant_tensor_gguf_opt_rtn_sym_dq(
278+
qdq_w, _, _ = quant_tensor_gguf_sym_dq(
279279
tensor=tensor, bits=bits, scale_dtype=scale_dtype, imatrix=imatrix, scale=scale, d_scale=d_scale
280280
)
281281
else:

auto_round/compressors/base.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,7 +1264,6 @@ def _quantize_layer_via_rtn(self, name: str, dtype: torch.dtype = None, to_cpu=T
12641264
enable_round_tuning=False,
12651265
enable_torch_compile=self.enable_torch_compile,
12661266
disable_opt_rtn=disable_opt_rtn,
1267-
enable_rtn=self.iters == 0,
12681267
)
12691268
m = m.unwrapper({})
12701269
except torch.OutOfMemoryError:
@@ -1280,7 +1279,6 @@ def _quantize_layer_via_rtn(self, name: str, dtype: torch.dtype = None, to_cpu=T
12801279
enable_norm_bias_tuning=False,
12811280
enable_round_tuning=False,
12821281
enable_torch_compile=self.enable_torch_compile,
1283-
enable_rtn=self.iters == 0,
12841282
)
12851283
m = m.unwrapper({})
12861284
except Exception as e:
@@ -1944,7 +1942,6 @@ def _quantize_layers(self, layer_names: list, layer_inputs: dict) -> None:
19441942
enable_torch_compile=self.enable_torch_compile,
19451943
device=self.device,
19461944
disable_opt_rtn=self.disable_opt_rtn,
1947-
enable_rtn=self.iters == 0,
19481945
)
19491946
new_layer = wrapper_layer.unwrapper({})
19501947
set_module(self.model, layer_name, new_layer)
@@ -2713,7 +2710,6 @@ def _quantize_layer(
27132710
enable_minmax_tuning=self.enable_minmax_tuning,
27142711
enable_torch_compile=self.enable_torch_compile,
27152712
device=device,
2716-
enable_rtn=self.iters == 0,
27172713
).to(device)
27182714
round_params = []
27192715
minmax_params = []
@@ -3025,7 +3021,6 @@ def _quantize_block(
30253021
self.enable_norm_bias_tuning,
30263022
enable_torch_compile=self.enable_torch_compile,
30273023
device=device,
3028-
enable_rtn=self.iters == 0,
30293024
)
30303025
# Call this before quantization and after applying the block wrapper.
30313026
if is_nv_fp(self.data_type): # enable qkv and moe structure global_scale fuse.

0 commit comments

Comments
 (0)