28
28
"quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
29
29
)
30
30
31
+ lib .define (
32
+ "quantize_per_tensor_asym8s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
33
+ )
34
+ lib .define (
35
+ "quantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
36
+ )
37
+
38
+ lib .define (
39
+ "quantize_per_tensor_asym8u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
40
+ )
41
+ lib .define (
42
+ "quantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
43
+ )
44
+
45
+ lib .define (
46
+ "quantize_per_tensor_asym16s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
47
+ )
48
+ lib .define (
49
+ "quantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
50
+ )
51
+
52
+ lib .define (
53
+ "quantize_per_tensor_asym16u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
54
+ )
55
+ lib .define (
56
+ "quantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
57
+ )
58
+
31
59
lib .define (
32
60
"dequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
33
61
)
34
62
lib .define (
35
63
"dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
36
64
)
65
+ lib .define (
66
+ "dequantize_per_tensor_asym8s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
67
+ )
68
+ lib .define (
69
+ "dequantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
70
+ )
71
+ lib .define (
72
+ "dequantize_per_tensor_asym8u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
73
+ )
74
+ lib .define (
75
+ "dequantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
76
+ )
77
+ lib .define (
78
+ "dequantize_per_tensor_asym16s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
79
+ )
80
+ lib .define (
81
+ "dequantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
82
+ )
83
+ lib .define (
84
+ "dequantize_per_tensor_asym16u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
85
+ )
86
+ lib .define (
87
+ "dequantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
88
+ )
37
89
38
90
lib .define (
39
91
"quantized_layer_norm(Tensor X, Tensor X_scale, Tensor X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point) -> (Tensor Y)"
@@ -541,6 +593,54 @@ def quantize_per_tensor_meta(
541
593
return input .new_empty (input .size (), dtype = dtype )
542
594
543
595
596
+ @register_fake ("cadence::quantize_per_tensor_asym8s" )
597
+ def quantize_per_tensor_asym8s_meta (
598
+ input : torch .Tensor ,
599
+ scale : float ,
600
+ zero_point : int ,
601
+ quant_min : int ,
602
+ quant_max : int ,
603
+ dtype : torch .dtype ,
604
+ ) -> torch .Tensor :
605
+ return input .new_empty (input .size (), dtype = dtype )
606
+
607
+
608
+ @register_fake ("cadence::quantize_per_tensor_asym8u" )
609
+ def quantize_per_tensor_asym8u_meta (
610
+ input : torch .Tensor ,
611
+ scale : float ,
612
+ zero_point : int ,
613
+ quant_min : int ,
614
+ quant_max : int ,
615
+ dtype : torch .dtype ,
616
+ ) -> torch .Tensor :
617
+ return input .new_empty (input .size (), dtype = dtype )
618
+
619
+
620
+ @register_fake ("cadence::quantize_per_tensor_asym16s" )
621
+ def quantize_per_tensor_asym16s_meta (
622
+ input : torch .Tensor ,
623
+ scale : float ,
624
+ zero_point : int ,
625
+ quant_min : int ,
626
+ quant_max : int ,
627
+ dtype : torch .dtype ,
628
+ ) -> torch .Tensor :
629
+ return input .new_empty (input .size (), dtype = dtype )
630
+
631
+
632
+ @register_fake ("cadence::quantize_per_tensor_asym16u" )
633
+ def quantize_per_tensor_asym16u_meta (
634
+ input : torch .Tensor ,
635
+ scale : float ,
636
+ zero_point : int ,
637
+ quant_min : int ,
638
+ quant_max : int ,
639
+ dtype : torch .dtype ,
640
+ ) -> torch .Tensor :
641
+ return input .new_empty (input .size (), dtype = dtype )
642
+
643
+
544
644
@register_fake ("cadence::dequantize_per_tensor" )
545
645
def dequantize_per_tensor_meta (
546
646
input : torch .Tensor ,
@@ -553,6 +653,54 @@ def dequantize_per_tensor_meta(
553
653
return input .new_empty (input .size (), dtype = torch .float )
554
654
555
655
656
+ @register_fake ("cadence::dequantize_per_tensor_asym8s" )
657
+ def dequantize_per_tensor_asym8s_meta (
658
+ input : torch .Tensor ,
659
+ scale : float ,
660
+ zero_point : int ,
661
+ quant_min : int ,
662
+ quant_max : int ,
663
+ dtype : torch .dtype ,
664
+ ) -> torch .Tensor :
665
+ return input .new_empty (input .size (), dtype = torch .float )
666
+
667
+
668
+ @register_fake ("cadence::dequantize_per_tensor_asym8u" )
669
+ def dequantize_per_tensor_asym8u_meta (
670
+ input : torch .Tensor ,
671
+ scale : float ,
672
+ zero_point : int ,
673
+ quant_min : int ,
674
+ quant_max : int ,
675
+ dtype : torch .dtype ,
676
+ ) -> torch .Tensor :
677
+ return input .new_empty (input .size (), dtype = torch .float )
678
+
679
+
680
+ @register_fake ("cadence::dequantize_per_tensor_asym16s" )
681
+ def dequantize_per_tensor_asym16s_meta (
682
+ input : torch .Tensor ,
683
+ scale : float ,
684
+ zero_point : int ,
685
+ quant_min : int ,
686
+ quant_max : int ,
687
+ dtype : torch .dtype ,
688
+ ) -> torch .Tensor :
689
+ return input .new_empty (input .size (), dtype = torch .float )
690
+
691
+
692
+ @register_fake ("cadence::dequantize_per_tensor_asym16u" )
693
+ def dequantize_per_tensor_asym16u_meta (
694
+ input : torch .Tensor ,
695
+ scale : float ,
696
+ zero_point : int ,
697
+ quant_min : int ,
698
+ quant_max : int ,
699
+ dtype : torch .dtype ,
700
+ ) -> torch .Tensor :
701
+ return input .new_empty (input .size (), dtype = torch .float )
702
+
703
+
556
704
@register_fake ("cadence::quantized_add" )
557
705
def quantized_add_meta (
558
706
X : torch .Tensor ,
0 commit comments