|
56 | 56 | "quantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
|
57 | 57 | )
|
58 | 58 |
|
| 59 | +lib.define( |
| 60 | + "quantize_per_tensor_asym32s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)" |
| 61 | +) |
| 62 | +lib.define( |
| 63 | + "quantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)" |
| 64 | +) |
| 65 | + |
59 | 66 | lib.define(
|
60 | 67 | "dequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
|
61 | 68 | )
|
|
87 | 94 | "dequantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
|
88 | 95 | )
|
89 | 96 |
|
| 97 | +lib.define( |
| 98 | + "dequantize_per_tensor_asym32s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)" |
| 99 | +) |
| 100 | +lib.define( |
| 101 | + "dequantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)" |
| 102 | +) |
| 103 | + |
90 | 104 | lib.define(
|
91 | 105 | "quantized_layer_norm(Tensor X, Tensor X_scale, Tensor X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point) -> (Tensor Y)"
|
92 | 106 | )
|
@@ -641,6 +655,18 @@ def quantize_per_tensor_asym16u_meta(
|
641 | 655 | return input.new_empty(input.size(), dtype=dtype)
|
642 | 656 |
|
643 | 657 |
|
| 658 | +@register_fake("cadence::quantize_per_tensor_asym32s") |
| 659 | +def quantize_per_tensor_asym32s_meta( |
| 660 | + input: torch.Tensor, |
| 661 | + scale: float, |
| 662 | + zero_point: int, |
| 663 | + quant_min: int, |
| 664 | + quant_max: int, |
| 665 | + dtype: torch.dtype, |
| 666 | +) -> torch.Tensor: |
| 667 | + return input.new_empty(input.size(), dtype=dtype) |
| 668 | + |
| 669 | + |
644 | 670 | @register_fake("cadence::dequantize_per_tensor")
|
645 | 671 | def dequantize_per_tensor_meta(
|
646 | 672 | input: torch.Tensor,
|
@@ -701,6 +727,18 @@ def dequantize_per_tensor_asym16u_meta(
|
701 | 727 | return input.new_empty(input.size(), dtype=torch.float)
|
702 | 728 |
|
703 | 729 |
|
| 730 | +@register_fake("cadence::dequantize_per_tensor_asym32s") |
| 731 | +def dequantize_per_tensor_asym32s_meta( |
| 732 | + input: torch.Tensor, |
| 733 | + scale: float, |
| 734 | + zero_point: int, |
| 735 | + quant_min: int, |
| 736 | + quant_max: int, |
| 737 | + dtype: torch.dtype, |
| 738 | +) -> torch.Tensor: |
| 739 | + return input.new_empty(input.size(), dtype=torch.float) |
| 740 | + |
| 741 | + |
704 | 742 | @register_fake("cadence::quantized_add")
|
705 | 743 | def quantized_add_meta(
|
706 | 744 | X: torch.Tensor,
|
|
0 commit comments