@@ -36,12 +36,11 @@ def test_quantize_per_tensor(
36
36
) -> None :
37
37
input_tensor = torch .tensor ([input_value ])
38
38
scale = (f_max - f_min ) / (q_max - q_min )
39
- inv_scale = 1.0 / scale
40
- zero_point = round (- f_min * inv_scale ) + q_min
39
+ zero_point = round (- f_min * 1 / scale ) + q_min
41
40
expected_output = torch .tensor ([expected_value ], dtype = target_dtype )
42
41
43
42
output = torch .ops .cadence .quantize_per_tensor (
44
- input_tensor , inv_scale , zero_point , q_min , q_max , target_dtype
43
+ input_tensor , scale , zero_point , q_min , q_max , target_dtype
45
44
)
46
45
47
46
self .assertEqual (
@@ -85,7 +84,7 @@ def test_dequantize_per_tensor(
85
84
expected_output = torch .tensor ([expected_value ], dtype = torch .float32 )
86
85
87
86
output = torch .ops .cadence .dequantize_per_tensor (
88
- input_tensor , scale , zero_point , q_min , q_max , torch . float32
87
+ input_tensor , scale , zero_point , q_min , q_max , input_tensor . dtype
89
88
)
90
89
91
90
self .assertEqual (
@@ -175,7 +174,7 @@ def test_quantized_add(
175
174
), # out_multiplier (0.5 * 2^31)
176
175
torch .tensor ([0 ], dtype = torch .int64 ), # out_shift
177
176
0 , # out_zero_point
178
- torch .tensor ([[- 2 ]], dtype = dtype ), # expected_output
177
+ torch .tensor ([[0 ]], dtype = dtype ), # expected_output
179
178
per_tensor ,
180
179
False ,
181
180
False ,
@@ -200,14 +199,36 @@ def test_quantized_add(
200
199
), # out_multiplier (0.5 * 2^31)
201
200
torch .tensor ([0 ], dtype = torch .int64 ), # out_shift
202
201
0 , # out_zero_point
203
- torch .tensor ([[- 10 , - 30 ]], dtype = dtype ), # expected_output
202
+ torch .tensor ([[- 2 , - 8 ]], dtype = dtype ), # expected_output
204
203
per_tensor ,
205
204
False ,
206
205
False ,
207
206
)
208
207
for (per_tensor , dtype ) in (
209
208
(False , torch .int8 ),
210
209
(True , torch .int8 ),
210
+ )
211
+ ],
212
+ * [
213
+ (
214
+ torch .Size ([1 , 3 ]), # src_shape: 1 sample, 3 input features
215
+ torch .Size (
216
+ [2 , 3 ]
217
+ ), # weight_shape: 2 output features, 3 input features
218
+ 0 , # in_zero_point
219
+ torch .tensor ([0 , 0 , 0 ], dtype = dtype ), # weight_zero_point
220
+ torch .tensor (
221
+ [1073741824 ], dtype = torch .int32
222
+ ), # out_multiplier (0.5 * 2^31)
223
+ torch .tensor ([0 ], dtype = torch .int64 ), # out_shift
224
+ 0 , # out_zero_point
225
+ torch .tensor ([[0 , 0 ]], dtype = dtype ), # expected_output
226
+ per_tensor ,
227
+ False ,
228
+ False ,
229
+ )
230
+ for (per_tensor , dtype ) in (
231
+ (False , torch .uint8 ),
211
232
(True , torch .uint8 ),
212
233
)
213
234
],
@@ -226,7 +247,7 @@ def test_quantized_add(
226
247
torch .tensor ([0 ], dtype = torch .int64 ), # out_shift
227
248
0 , # out_zero_point
228
249
torch .tensor (
229
- [[[- 2 , - 8 , - 14 ], [- 6 , - 28 , - 50 ]]], dtype = dtype
250
+ [[[0 , - 2 , - 4 ], [- 2 , - 7 , - 12 ]]], dtype = dtype
230
251
), # expected_output
231
252
per_tensor ,
232
253
False ,
@@ -235,7 +256,6 @@ def test_quantized_add(
235
256
for (per_tensor , dtype ) in (
236
257
(False , torch .int8 ),
237
258
(True , torch .int8 ),
238
- (True , torch .uint8 ),
239
259
)
240
260
],
241
261
# Test case 4: Non-zero zero points
@@ -252,15 +272,15 @@ def test_quantized_add(
252
272
), # out_multiplier (1.0 * 2^31)
253
273
torch .tensor ([0 ], dtype = torch .int64 ), # out_shift
254
274
1 , # out_zero_point
255
- torch .tensor ([[- 15 , 25 ]], dtype = dtype ), # expected_output
275
+ torch .tensor ([[1 , 1 ]], dtype = dtype ), # expected_output
256
276
per_tensor ,
257
277
False ,
258
278
False ,
259
279
)
260
280
for (per_tensor , dtype ) in (
261
281
(False , torch .int8 ),
262
282
(True , torch .int8 ),
263
- (True , torch .uint8 ),
283
+ # (True, torch.uint8),
264
284
)
265
285
],
266
286
# Test case 5: Non-uniform weight zero points
@@ -277,12 +297,12 @@ def test_quantized_add(
277
297
), # out_multiplier (1.0 * 2^31)
278
298
torch .tensor ([0 ], dtype = torch .int64 ), # out_shift
279
299
1 , # out_zero_point
280
- torch .tensor ([[- 23 , 17 ]], dtype = dtype ), # expected_output
300
+ torch .tensor ([[1 , 1 ]], dtype = dtype ), # expected_output
281
301
False ,
282
302
False ,
283
303
False ,
284
304
)
285
- for dtype in (torch .int8 , torch . uint8 )
305
+ for dtype in (torch .int8 ,)
286
306
],
287
307
# Test case 6: Non-zero out_shift (shift=1)
288
308
* [
@@ -300,7 +320,7 @@ def test_quantized_add(
300
320
[1 ], dtype = torch .int64
301
321
), # out_shift (shift=1, doubles the scale)
302
322
1 , # out_zero_point
303
- torch .tensor ([[- 7 , 13 ]], dtype = dtype ), # expected_output
323
+ torch .tensor ([[1 , 2 ]], dtype = dtype ), # expected_output
304
324
per_tensor ,
305
325
False ,
306
326
False ,
@@ -322,13 +342,13 @@ def test_quantized_add(
322
342
[1 ], dtype = torch .int64
323
343
), # out_shift (shift=1, doubles the scale)
324
344
1 , # out_zero_point
325
- torch .tensor ([[- 7 , 17 ]], dtype = dtype ), # expected_output
345
+ torch .tensor ([[1 , 2 ]], dtype = dtype ), # expected_output
326
346
per_tensor ,
327
347
matmul ,
328
348
transposed_matmul ,
329
349
)
330
350
for (matmul , transposed_matmul ) in ((True , False ), (True , True ))
331
- for (per_tensor , dtype ) in ((True , torch .int8 ), ( True , torch . uint8 ) )
351
+ for (per_tensor , dtype ) in ((True , torch .int8 ),)
332
352
],
333
353
]
334
354
)
@@ -1045,7 +1065,20 @@ def test_quantized_conv_per_tensor(
1045
1065
[4 , 2 , 0 , - 2 ], dtype = dtype
1046
1066
), # expected: relu(1,3,5,7) = (1,3,5,7) * (-1.0) + 5 = (4,2,0,-2)
1047
1067
)
1048
- for dtype in [torch .int8 , torch .uint8 ]
1068
+ for dtype in [torch .int8 ]
1069
+ ],
1070
+ * [
1071
+ (
1072
+ "positive_with_shift_unsigned" ,
1073
+ torch .tensor ([2 , 4 , 6 , 8 ], dtype = dtype ), # input
1074
+ 1 , # X_zero_point
1075
+ 5 , # out_zero_point
1076
+ 1073741824 , # out_multiplier (0.5 * 2^31)
1077
+ 1 , # out_shift (multiply by 2^1 = 2)
1078
+ dtype , # dtype
1079
+ torch .tensor ([4 , 2 , 0 , 0 ], dtype = dtype ),
1080
+ )
1081
+ for dtype in [torch .uint8 ]
1049
1082
],
1050
1083
# Test case 4: Non-per-tensor
1051
1084
* [
0 commit comments