File tree Expand file tree Collapse file tree 1 file changed +2
-4
lines changed
src/llmcompressor/observers Expand file tree Collapse file tree 1 file changed +2
-4
lines changed Original file line number Diff line number Diff line change 10
10
)
11
11
from compressed_tensors .quantization .utils import is_fp4
12
12
from compressed_tensors .registry .registry import RegistryMixin
13
- from compressed_tensors .utils import safe_permute
14
13
from loguru import logger
15
14
from torch import FloatTensor , IntTensor , Tensor
16
15
@@ -56,7 +55,7 @@ def forward(
56
55
# NOTE: this function updates running min/max values, which leads to
57
56
# running values updating twice
58
57
return self .get_gparam (observed = observed )
59
-
58
+
60
59
return self .get_qparams (
61
60
observed = observed ,
62
61
g_idx = g_idx ,
@@ -172,8 +171,7 @@ def get_qparams(
172
171
group_indices , group_sizes = torch .unique (g_idx , return_counts = True )
173
172
group_sizes = group_sizes [torch .argsort (group_indices )]
174
173
175
- perm = torch .argsort (g_idx )
176
- observed = safe_permute (observed , perm , dim = 1 )
174
+ observed = observed .index_select (g_idx , - 1 )
177
175
178
176
# TODO: experiment with vectorizing for loop for performance
179
177
end = 0
You can’t perform that action at this time.
0 commit comments