|
15 | 15 | dequantize_per_tensor,
|
16 | 16 | quantize_per_tensor,
|
17 | 17 | quantized_add,
|
| 18 | + quantized_layer_norm_per_tensor, |
18 | 19 | quantized_linear,
|
19 | 20 | )
|
20 | 21 | from executorch.backends.cadence.aot.typing_stubs import expand
|
@@ -240,3 +241,97 @@ def test_quantized_linear(
|
240 | 241 | torch.equal(output, expected_output),
|
241 | 242 | f"Values don't match: got {output}, expected {expected_output}",
|
242 | 243 | )
|
| 244 | + |
| 245 | + @expand( |
| 246 | + [ |
| 247 | + # Test case 1: Simple case with int8, zero mean input |
| 248 | + ( |
| 249 | + torch.tensor( |
| 250 | + [[-1, 1]], dtype=torch.int8 |
| 251 | + ), # input: dequantized to [-0.1, 0.1] |
| 252 | + 0.1, # X_scale |
| 253 | + 0, # X_zero_point |
| 254 | + 2, # normalized_shape (last dimension) |
| 255 | + torch.tensor([1.0, 1.0]), # weight |
| 256 | + torch.tensor([0.0, 0.0]), # bias |
| 257 | + 1e-5, # eps |
| 258 | + 0.1, # output_scale |
| 259 | + 0, # output_zero_point |
| 260 | + torch.int8, # dtype |
| 261 | + torch.tensor([[-10, 10]], dtype=torch.int8), # expected_output |
| 262 | + ), |
| 263 | + # Test case 2: uint8 with zero_point offset |
| 264 | + ( |
| 265 | + torch.tensor( |
| 266 | + [[127, 129]], dtype=torch.uint8 |
| 267 | + ), # input: dequantized to [-0.05, 0.05] |
| 268 | + 0.05, # X_scale |
| 269 | + 128, # X_zero_point |
| 270 | + 2, # normalized_shape (last dimension) |
| 271 | + torch.tensor([1.0, 1.0]), # weight |
| 272 | + torch.tensor([0.0, 0.0]), # bias |
| 273 | + 1e-5, # eps |
| 274 | + 0.05, # output_scale |
| 275 | + 128, # output_zero_point |
| 276 | + torch.uint8, # dtype |
| 277 | + torch.tensor([[108, 148]], dtype=torch.uint8), # expected_output |
| 278 | + ), |
| 279 | + # Test case 3: Test with weight and bias scaling |
| 280 | + ( |
| 281 | + torch.tensor( |
| 282 | + [[-2, 2]], dtype=torch.int8 |
| 283 | + ), # input: dequantized to [-0.2, 0.2] |
| 284 | + 0.1, # X_scale |
| 285 | + 0, # X_zero_point |
| 286 | + 2, # normalized_shape (last dimension) |
| 287 | + torch.tensor( |
| 288 | + [2.0, 0.5] |
| 289 | + ), # weight: scale first element by 2, second by 0.5 |
| 290 | + torch.tensor( |
| 291 | + [0.1, -0.1] |
| 292 | + ), # bias: add 0.1 to first, subtract 0.1 from second |
| 293 | + 1e-5, # eps |
| 294 | + 0.1, # output_scale |
| 295 | + 0, # output_zero_point |
| 296 | + torch.int8, # dtype |
| 297 | + torch.tensor([[-19, 4]], dtype=torch.int8), # expected_output |
| 298 | + ), |
| 299 | + ] |
| 300 | + ) |
| 301 | + def test_quantized_layer_norm_per_tensor( |
| 302 | + self, |
| 303 | + input_tensor: torch.Tensor, |
| 304 | + X_scale: float, |
| 305 | + X_zero_point: int, |
| 306 | + normalized_shape: int, |
| 307 | + weight: torch.Tensor, |
| 308 | + bias: torch.Tensor, |
| 309 | + eps: float, |
| 310 | + output_scale: float, |
| 311 | + output_zero_point: int, |
| 312 | + dtype: torch.dtype, |
| 313 | + expected_output: torch.Tensor, |
| 314 | + ) -> None: |
| 315 | + output = quantized_layer_norm_per_tensor( |
| 316 | + input_tensor, |
| 317 | + X_scale, |
| 318 | + X_zero_point, |
| 319 | + normalized_shape, |
| 320 | + weight, |
| 321 | + bias, |
| 322 | + eps, |
| 323 | + output_scale, |
| 324 | + output_zero_point, |
| 325 | + ) |
| 326 | + |
| 327 | + # Verify output properties |
| 328 | + self.assertEqual(output.dtype, dtype, f"Output dtype should be {dtype}") |
| 329 | + self.assertEqual( |
| 330 | + output.shape, input_tensor.shape, "Output shape should match input shape" |
| 331 | + ) |
| 332 | + |
| 333 | + # Verify output matches expected values |
| 334 | + self.assertTrue( |
| 335 | + torch.equal(output, expected_output), |
| 336 | + f"Output values don't match expected. Got {output}, expected {expected_output}", |
| 337 | + ) |
0 commit comments