|
569 | 569 | (Clmul) |
570 | 570 | (Clmulh) |
571 | 571 | (Clmulr) |
| 572 | + |
| 573 | + ;; Zbkb: Bit-manipulation for Cryptography |
| 574 | + (Pack) |
| 575 | + (Packw) |
| 576 | + (Packh) |
572 | 577 | )) |
573 | 578 |
|
574 | 579 |
|
|
858 | 863 | (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero))))) |
859 | 864 | dst)) |
860 | 865 |
|
861 | | -;; extend int if need. |
862 | | -(decl ext_int_if_need (bool ValueRegs Type) ValueRegs) |
863 | | -;;; for I8, I16, and I32 ... |
864 | | -(rule -1 |
865 | | - (ext_int_if_need signed val ty) |
866 | | - (gen_extend val signed (ty_bits ty) 64)) |
867 | | -;;; otherwise this is a I64 or I128 |
868 | | -;;; no need to extend. |
869 | | -(rule |
870 | | - (ext_int_if_need _ r $I64) |
871 | | - r) |
872 | | -(rule |
873 | | - (ext_int_if_need _ r $I128) |
874 | | - r) |
875 | | - |
876 | | - |
877 | 866 | ;; Helper for get negative of Imm12 |
878 | 867 | (decl neg_imm12 (Imm12) Imm12) |
879 | 868 | (extern constructor neg_imm12 neg_imm12) |
|
1031 | 1020 | ;; add low and high together. |
1032 | 1021 | (result Reg (alu_add high low))) |
1033 | 1022 | (value_regs result (load_u64_constant 0)))) |
| 1023 | + |
| 1024 | +;; Extends an integer if it is smaller than 64 bits. |
| 1025 | +(decl ext_int_if_need (bool ValueRegs Type) ValueRegs) |
| 1026 | +;;; For values smaller than 64 bits, we need to extend them to 64 bits |
| 1027 | +(rule 0 (ext_int_if_need $true val (fits_in_32 (ty_int ty))) |
| 1028 | + (sext val ty $I64)) |
| 1029 | +(rule 0 (ext_int_if_need $false val (fits_in_32 (ty_int ty))) |
| 1030 | + (zext val ty $I64)) |
| 1031 | +;; If the value is larger than one machine register, we don't need to do anything |
| 1032 | +(rule 1 (ext_int_if_need _ r $I64) r) |
| 1033 | +(rule 2 (ext_int_if_need _ r $I128) r) |
1034 | 1034 |
|
1035 | | -(decl gen_extend (Reg bool u8 u8) Reg) |
1036 | | -(rule |
1037 | | - (gen_extend r is_signed from_bits to_bits) |
1038 | | - (let |
1039 | | - ((tmp WritableReg (temp_writable_reg $I16)) |
1040 | | - (_ Unit (emit (MInst.Extend tmp r is_signed from_bits to_bits)))) |
1041 | | - tmp)) |
1042 | 1035 |
|
1043 | | -;; val is_signed from_bits to_bits |
1044 | | -(decl lower_extend (Reg bool u8 u8) ValueRegs) |
1045 | | -(rule -1 |
1046 | | - (lower_extend r is_signed from_bits to_bits) |
1047 | | - (gen_extend r is_signed from_bits to_bits)) |
| 1036 | +;; Performs a zero extension of the given value |
| 1037 | +(decl zext (ValueRegs Type Type) ValueRegs) |
| 1038 | +(rule (zext val from_ty to_ty) (extend val (ExtendOp.Zero) from_ty to_ty)) |
1048 | 1039 |
|
1049 | | -;;;; for I128 signed extend. |
1050 | | -(rule 1 |
1051 | | - (lower_extend r $true 64 128) |
1052 | | - (let |
1053 | | - ((tmp Reg (alu_rrr (AluOPRRR.Slt) r (zero_reg))) |
1054 | | - (high Reg (gen_extend tmp $true 1 64))) |
1055 | | - (value_regs (gen_move2 r $I64 $I64) high))) |
| 1040 | +;; Performs a signed extension of the given value |
| 1041 | +(decl sext (ValueRegs Type Type) ValueRegs) |
| 1042 | +(rule (sext val from_ty to_ty) (extend val (ExtendOp.Signed) from_ty to_ty)) |
1056 | 1043 |
|
1057 | | -(rule |
1058 | | - (lower_extend r $true from_bits 128) |
1059 | | - (let |
1060 | | - ((tmp Reg (gen_extend r $true from_bits 64)) |
1061 | | - (tmp2 Reg (alu_rrr (AluOPRRR.Slt) tmp (zero_reg))) |
1062 | | - (high Reg (gen_extend tmp2 $true 1 64))) |
1063 | | - (value_regs (gen_move2 tmp $I64 $I64) high))) |
| 1044 | +(type ExtendOp |
| 1045 | + (enum |
| 1046 | + (Zero) |
| 1047 | + (Signed))) |
| 1048 | + |
| 1049 | +;; Performs either a sign or zero extension of the given value |
| 1050 | +(decl extend (ValueRegs ExtendOp Type Type) ValueRegs) |
| 1051 | + |
| 1052 | +;;; Generic Rules Extending to I64 |
| 1053 | +(decl pure extend_shift_op (ExtendOp) AluOPRRI) |
| 1054 | +(rule (extend_shift_op (ExtendOp.Zero)) (AluOPRRI.Srli)) |
| 1055 | +(rule (extend_shift_op (ExtendOp.Signed)) (AluOPRRI.Srai)) |
| 1056 | + |
| 1057 | +;; In the most generic case, we shift left and then shift right. |
| 1058 | +;; The type of right shift is determined by the extend op. |
| 1059 | +(rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty)) |
| 1060 | + (let ((val Reg (value_regs_get val 0)) |
| 1061 | + (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty)))) |
| 1062 | + (left Reg (alu_rr_imm12 (AluOPRRI.Slli) val shift)) |
| 1063 | + (shift_op AluOPRRI (extend_shift_op extend_op)) |
| 1064 | + (right Reg (alu_rr_imm12 shift_op left shift))) |
| 1065 | + right)) |
| 1066 | + |
| 1067 | +;; If we are zero extending a U8 we can use a `andi` instruction. |
| 1068 | +(rule 1 (extend val (ExtendOp.Zero) $I8 (fits_in_64 to_ty)) |
| 1069 | + (let ((val Reg (value_regs_get val 0))) |
| 1070 | + (alu_rr_imm12 (AluOPRRI.Andi) val (imm12_const 255)))) |
| 1071 | + |
| 1072 | +;; When signed extending from 32 to 64 bits we can use a |
| 1073 | +;; `addiw val 0`. Also known as a `sext.w` |
| 1074 | +(rule 1 (extend val (ExtendOp.Signed) $I32 $I64) |
| 1075 | + (let ((val Reg (value_regs_get val 0))) |
| 1076 | + (alu_rr_imm12 (AluOPRRI.Addiw) val (imm12_const 0)))) |
| 1077 | + |
| 1078 | + |
| 1079 | +;; No point in trying to use `packh` here to zero extend 8 bit values |
| 1080 | +;; since we can just use `andi` instead which is part of the base ISA. |
| 1081 | + |
| 1082 | +;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values |
| 1083 | +(rule 1 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _)) |
| 1084 | + (if-let $true (has_zbkb)) |
| 1085 | + (let ((val Reg (value_regs_get val 0))) |
| 1086 | + (alu_rrr (AluOPRRR.Packw) val (zero_reg)))) |
1064 | 1087 |
|
| 1088 | +;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers |
| 1089 | +(rule 1 (extend val (ExtendOp.Zero) $I32 $I64) |
| 1090 | + (if-let $true (has_zbkb)) |
| 1091 | + (let ((val Reg (value_regs_get val 0))) |
| 1092 | + (alu_rrr (AluOPRRR.Pack) val (zero_reg)))) |
1065 | 1093 |
|
1066 | | -;;;; for I128 unsigned extend. |
1067 | | -(rule 1 |
1068 | | - (lower_extend r $false 64 128) |
1069 | | - (value_regs (gen_move2 r $I64 $I64) (load_u64_constant 0))) |
1070 | 1094 |
|
1071 | | -(rule |
1072 | | - (lower_extend r $false from_bits 128) |
1073 | | - (value_regs (gen_extend r $false from_bits 64) (load_u64_constant 0))) |
| 1095 | +;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction. |
| 1096 | +(rule 1 (extend val (ExtendOp.Signed) $I8 (fits_in_64 _)) |
| 1097 | + (if-let $true (has_zbb)) |
| 1098 | + (let ((val Reg (value_regs_get val 0))) |
| 1099 | + (alu_rr_imm12 (AluOPRRI.Sextb) val (imm12_const 0)))) |
| 1100 | + |
| 1101 | +;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction. |
| 1102 | +(rule 1 (extend val (ExtendOp.Signed) $I16 (fits_in_64 _)) |
| 1103 | + (if-let $true (has_zbb)) |
| 1104 | + (let ((val Reg (value_regs_get val 0))) |
| 1105 | + (alu_rr_imm12 (AluOPRRI.Sexth) val (imm12_const 0)))) |
| 1106 | + |
| 1107 | +;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction. |
| 1108 | +(rule 2 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _)) |
| 1109 | + (if-let $true (has_zbb)) |
| 1110 | + (let ((val Reg (value_regs_get val 0))) |
| 1111 | + (alu_rr_imm12 (AluOPRRI.Zexth) val (imm12_const 0)))) |
| 1112 | + |
| 1113 | +;;; Signed rules extending to I128 |
| 1114 | +;; Extend the bottom part, and extract the sign bit from the bottom as the top |
| 1115 | +(rule 2 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128) |
| 1116 | + (let ((val Reg (value_regs_get val 0)) |
| 1117 | + (low Reg (extend val (ExtendOp.Signed) from_ty $I64)) |
| 1118 | + (high Reg (alu_rr_imm12 (AluOPRRI.Srai) low (imm12_const 63)))) |
| 1119 | + (value_regs low high))) |
| 1120 | + |
| 1121 | +;;; Unsigned rules extending to I128 |
| 1122 | +;; Extend the bottom register to I64 and then just zero out the top half. |
| 1123 | +(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128) |
| 1124 | + (let ((val Reg (value_regs_get val 0)) |
| 1125 | + (low Reg (extend val (ExtendOp.Zero) from_ty $I64)) |
| 1126 | + (high Reg (load_u64_constant 0))) |
| 1127 | + (value_regs low high))) |
| 1128 | + |
| 1129 | +;; Catch all rule for ignoring extensions of the same type. |
| 1130 | +(rule 4 (extend val _ ty ty) val) |
| 1131 | + |
1074 | 1132 |
|
1075 | | -;; extract the sign bit of integer. |
1076 | | -(decl ext_sign_bit (Type Reg) Reg) |
1077 | | -(extern constructor ext_sign_bit ext_sign_bit) |
1078 | 1133 |
|
1079 | 1134 | (decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs) |
1080 | 1135 | (rule |
|
1795 | 1850 | (rule (lower_icmp cc x y ty) |
1796 | 1851 | (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty)) |
1797 | 1852 |
|
1798 | | -(decl lower_icmp_over_flow (ValueRegs ValueRegs Type) Reg) |
1799 | | - |
1800 | | -;;; for I8 I16 I32 |
1801 | | -(rule 1 |
1802 | | - (lower_icmp_over_flow x y ty) |
1803 | | - (let |
1804 | | - ((tmp Reg (alu_sub (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) |
1805 | | - (tmp2 WritableReg (temp_writable_reg $I64)) |
1806 | | - (_ Unit (emit (MInst.Extend tmp2 tmp $true (ty_bits ty) 64)))) |
1807 | | - (gen_icmp (IntCC.NotEqual) (writable_reg_to_reg tmp2) tmp $I64))) |
1808 | | - |
1809 | | -;;; $I64 |
1810 | | -(rule 3 |
1811 | | - (lower_icmp_over_flow x y $I64) |
1812 | | - (let |
1813 | | - ((y_sign Reg (alu_rrr (AluOPRRR.Sgt) y (zero_reg))) |
1814 | | - (sub_result Reg (alu_sub x y)) |
1815 | | - (tmp Reg (alu_rrr (AluOPRRR.Slt) sub_result x))) |
1816 | | - (gen_icmp (IntCC.NotEqual) y_sign tmp $I64))) |
1817 | | - |
1818 | | -;;; $I128 |
1819 | | -(rule 2 |
1820 | | - (lower_icmp_over_flow x y $I128) |
1821 | | - (let |
1822 | | - ( ;; x sign bit. |
1823 | | - (xs Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get x 1) (imm12_const 63))) |
1824 | | - ;; y sign bit. |
1825 | | - (ys Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get y 1) (imm12_const 63))) |
1826 | | - ;; |
1827 | | - (sub_result ValueRegs (i128_sub x y)) |
1828 | | - ;; result sign bit. |
1829 | | - (rs Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get sub_result 1) (imm12_const 63))) |
1830 | | - |
1831 | | - ;;; xs && !ys && !rs |
1832 | | - ;;; x is positive y is negtive and result is negative. |
1833 | | - ;;; must overflow |
1834 | | - (tmp1 Reg (alu_and xs (alu_and (gen_bit_not ys) (gen_bit_not rs)))) |
1835 | | - ;;; !xs && ys && rs |
1836 | | - ;;; x is negative y is positive and result is positive. |
1837 | | - ;;; overflow |
1838 | | - (tmp2 Reg (alu_and (gen_bit_not xs) (alu_and ys rs))) |
1839 | | - ;;;tmp3 |
1840 | | - (tmp3 Reg (alu_rrr (AluOPRRR.Or) tmp1 tmp2))) |
1841 | | - (gen_extend tmp3 $true 1 64))) |
1842 | 1853 |
|
1843 | 1854 | (decl i128_sub (ValueRegs ValueRegs) ValueRegs) |
1844 | 1855 | (rule |
|
0 commit comments