@@ -96,22 +96,7 @@ def test_end_to_end_asymmetric_quantization(strategy, group_size):
96
96
)
97
97
apply_quantization_config (model , quant_config )
98
98
99
- for name , module in model .named_modules ():
100
- if isinstance (module , Linear ):
101
- weight = module .weight
102
- if strategy == QuantizationStrategy .CHANNEL :
103
- scale_shape = (weight .shape [0 ], 1 )
104
- else :
105
- scale_shape = (weight .shape [0 ], weight .shape [1 ] // group_size )
106
-
107
- module .weight_scale = torch .nn .Parameter (
108
- torch .rand (scale_shape ) * 0.1 ,
109
- requires_grad = False
110
- )
111
- module .weight_zero_point = torch .nn .Parameter (
112
- torch .randint (- 8 , 8 , scale_shape , dtype = torch .int8 ),
113
- requires_grad = False
114
- )
99
+
115
100
116
101
compressor = PackedQuantizationCompressor (config = quant_config )
117
102
quantized_modules_to_scheme = {
@@ -168,34 +153,32 @@ def test_asymmetric_quantization_accuracy(num_bits):
168
153
"""
169
154
with tempfile .TemporaryDirectory () as tmp_dir :
170
155
tmp_path = Path (tmp_dir )
171
-
156
+
172
157
shape = (256 , 512 )
173
- weights = torch .randn (shape ) + 2.0
174
-
158
+ biased_weights = torch .randn (shape ) + 2.0
159
+
175
160
quant_config = create_asymmetric_quant_config (
176
161
num_bits = num_bits ,
177
162
strategy = QuantizationStrategy .GROUP ,
178
- group_size = 128
163
+ group_size = 128 ,
179
164
)
180
-
181
- group_size = 128
182
- num_groups = shape [1 ] // group_size
183
- scale_shape = (shape [0 ], num_groups )
184
-
185
- scales = torch .rand (scale_shape ) * 0.1
186
- zero_points = torch .randint (- 2 ** (num_bits - 1 ), 2 ** (num_bits - 1 ), scale_shape , dtype = torch .int8 )
187
-
188
- state_dict = {
189
- "layer.weight" : weights ,
190
- "layer.weight_scale" : scales ,
191
- "layer.weight_zero_point" : zero_points ,
192
- }
193
-
165
+
166
+ class SingleLayer (Module ):
167
+ def __init__ (self ):
168
+ super ().__init__ ()
169
+ self .layer = Linear (shape [1 ], shape [0 ], bias = False )
170
+
171
+ model = SingleLayer ()
172
+ apply_quantization_config (model , quant_config )
173
+
174
+ with torch .no_grad ():
175
+ model .layer .weight .copy_ (biased_weights )
176
+
194
177
compressor = PackedQuantizationCompressor (config = quant_config )
195
178
quantized_modules_to_scheme = {"layer" : quant_config .config_groups ["group_1" ]}
196
-
179
+
197
180
compressed_state_dict = compressor .compress (
198
- state_dict .copy (), names_to_scheme = quantized_modules_to_scheme
181
+ model . state_dict () .copy (), names_to_scheme = quantized_modules_to_scheme
199
182
)
200
183
201
184
save_file (compressed_state_dict , tmp_path / "model.safetensors" )
0 commit comments