File tree Expand file tree Collapse file tree 3 files changed +17
-12
lines changed
modifiers/pruning/sparsegpt Expand file tree Collapse file tree 3 files changed +17
-12
lines changed Original file line number Diff line number Diff line change @@ -47,8 +47,8 @@ def test_is_quantized_cache_singleton():
47
47
48
48
49
49
def test_update ():
50
- nbits = 8
51
- args = QuantizationArgs (nbits = nbits , symmetric = True )
50
+ num_bits = 8
51
+ args = QuantizationArgs (num_bits = num_bits , symmetric = True )
52
52
cache = QuantizedKVParameterCache (args )
53
53
54
54
max_key_states_val = 1.0
@@ -62,7 +62,7 @@ def test_update():
62
62
layer_idx = 0
63
63
64
64
cache .update (key_states , value_states , layer_idx )
65
- denom = (2 ** (nbits ) - 1 ) / 2
65
+ denom = (2 ** (num_bits ) - 1 ) / 2
66
66
expected_k_scale = torch .tensor ([max_key_states_val / denom ])
67
67
expected_v_scale = torch .tensor ([max_value_states_val / denom ])
68
68
@@ -83,8 +83,8 @@ def test_update():
83
83
84
84
85
85
def test_cache_reset ():
86
- nbits = 8
87
- args = QuantizationArgs (nbits = nbits , symmetric = True )
86
+ num_bits = 8
87
+ args = QuantizationArgs (num_bits = num_bits , symmetric = True )
88
88
cache = QuantizedKVParameterCache (args )
89
89
90
90
max_key_states_val = 1.0
Original file line number Diff line number Diff line change @@ -96,13 +96,11 @@ def setUp(self):
96
96
"symmetric" : False ,
97
97
"strategy" : "token" ,
98
98
"dynamic" : True ,
99
- "kwargs" : {},
100
99
},
101
100
"weights" : {
102
101
"num_bits" : 4 ,
103
102
"symmetric" : True ,
104
103
"strategy" : "channel" ,
105
- "kwargs" : {},
106
104
},
107
105
}
108
106
}
Original file line number Diff line number Diff line change 1
1
import pytest
2
2
import torch
3
- from compressed_tensors .quantization import QuantizationArgs , QuantizationScheme
3
+ from compressed_tensors .quantization import (
4
+ QuantizationArgs ,
5
+ QuantizationScheme ,
6
+ QuantizationStrategy ,
7
+ QuantizationType ,
8
+ )
4
9
from torch .nn import Linear , Module , ReLU
5
10
6
11
from llmcompressor .pytorch .utils import ModuleSparsificationInfo
@@ -16,10 +21,12 @@ def __init__(self):
16
21
self .fc1 .quantization_scheme = QuantizationScheme (
17
22
targets = ["model.fc1" ],
18
23
weights = QuantizationArgs (
19
- precision = 8 ,
20
- granularity = "per_tensor" ,
21
- algorithm = "gptq" ,
22
- blocksize = 128 ,
24
+ num_bits = 8 ,
25
+ type = QuantizationType .INT ,
26
+ group_size = 128 ,
27
+ strategy = QuantizationStrategy .GROUP ,
28
+ symmetric = True ,
29
+ dynamic = False ,
23
30
),
24
31
)
25
32
You can’t perform that action at this time.
0 commit comments