Skip to content

Commit f6c6bc2

Browse files
authored
riscv64: Improve signed and zero extend codegen (bytecodealliance#5844)
* riscv64: Remove unused code * riscv64: Group extend rules * riscv64: Remove more unused rules * riscv64: Cleanup existing extension rules * riscv64: Move the existing Extend rules to ISLE * riscv64: Use `sext.w` when extending * riscv64: Remove duplicate extend tests * riscv64: Use `zbb` instructions when extending values * riscv64: Use `zbkb` extensions when zero extending * riscv64: Enable additional tests for extend i128 * riscv64: Fix formatting for `Inst::Extend` * riscv64: Reverse register for pack * riscv64: Misc Cleanups * riscv64: Cleanup extend rules
1 parent 6e6a103 commit f6c6bc2

File tree

68 files changed

+1918
-1581
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+1918
-1581
lines changed

cranelift/codegen/src/isa/riscv64/inst.isle

Lines changed: 107 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@
569569
(Clmul)
570570
(Clmulh)
571571
(Clmulr)
572+
573+
;; Zbkb: Bit-manipulation for Cryptography
574+
(Pack)
575+
(Packw)
576+
(Packh)
572577
))
573578

574579

@@ -858,22 +863,6 @@
858863
(_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
859864
dst))
860865

861-
;; extend int if need.
862-
(decl ext_int_if_need (bool ValueRegs Type) ValueRegs)
863-
;;; for I8, I16, and I32 ...
864-
(rule -1
865-
(ext_int_if_need signed val ty)
866-
(gen_extend val signed (ty_bits ty) 64))
867-
;;; otherwise this is a I64 or I128
868-
;;; no need to extend.
869-
(rule
870-
(ext_int_if_need _ r $I64)
871-
r)
872-
(rule
873-
(ext_int_if_need _ r $I128)
874-
r)
875-
876-
877866
;; Helper for get negative of Imm12
878867
(decl neg_imm12 (Imm12) Imm12)
879868
(extern constructor neg_imm12 neg_imm12)
@@ -1031,50 +1020,116 @@
10311020
;; add low and high together.
10321021
(result Reg (alu_add high low)))
10331022
(value_regs result (load_u64_constant 0))))
1023+
1024+
;; Extends an integer if it is smaller than 64 bits.
1025+
(decl ext_int_if_need (bool ValueRegs Type) ValueRegs)
1026+
;;; For values smaller than 64 bits, we need to extend them to 64 bits
1027+
(rule 0 (ext_int_if_need $true val (fits_in_32 (ty_int ty)))
1028+
(sext val ty $I64))
1029+
(rule 0 (ext_int_if_need $false val (fits_in_32 (ty_int ty)))
1030+
(zext val ty $I64))
1031+
;; If the value is larger than one machine register, we don't need to do anything
1032+
(rule 1 (ext_int_if_need _ r $I64) r)
1033+
(rule 2 (ext_int_if_need _ r $I128) r)
10341034

1035-
(decl gen_extend (Reg bool u8 u8) Reg)
1036-
(rule
1037-
(gen_extend r is_signed from_bits to_bits)
1038-
(let
1039-
((tmp WritableReg (temp_writable_reg $I16))
1040-
(_ Unit (emit (MInst.Extend tmp r is_signed from_bits to_bits))))
1041-
tmp))
10421035

1043-
;; val is_signed from_bits to_bits
1044-
(decl lower_extend (Reg bool u8 u8) ValueRegs)
1045-
(rule -1
1046-
(lower_extend r is_signed from_bits to_bits)
1047-
(gen_extend r is_signed from_bits to_bits))
1036+
;; Performs a zero extension of the given value
1037+
(decl zext (ValueRegs Type Type) ValueRegs)
1038+
(rule (zext val from_ty to_ty) (extend val (ExtendOp.Zero) from_ty to_ty))
10481039

1049-
;;;; for I128 signed extend.
1050-
(rule 1
1051-
(lower_extend r $true 64 128)
1052-
(let
1053-
((tmp Reg (alu_rrr (AluOPRRR.Slt) r (zero_reg)))
1054-
(high Reg (gen_extend tmp $true 1 64)))
1055-
(value_regs (gen_move2 r $I64 $I64) high)))
1040+
;; Performs a signed extension of the given value
1041+
(decl sext (ValueRegs Type Type) ValueRegs)
1042+
(rule (sext val from_ty to_ty) (extend val (ExtendOp.Signed) from_ty to_ty))
10561043

1057-
(rule
1058-
(lower_extend r $true from_bits 128)
1059-
(let
1060-
((tmp Reg (gen_extend r $true from_bits 64))
1061-
(tmp2 Reg (alu_rrr (AluOPRRR.Slt) tmp (zero_reg)))
1062-
(high Reg (gen_extend tmp2 $true 1 64)))
1063-
(value_regs (gen_move2 tmp $I64 $I64) high)))
1044+
(type ExtendOp
1045+
(enum
1046+
(Zero)
1047+
(Signed)))
1048+
1049+
;; Performs either a sign or zero extension of the given value
1050+
(decl extend (ValueRegs ExtendOp Type Type) ValueRegs)
1051+
1052+
;;; Generic Rules Extending to I64
1053+
(decl pure extend_shift_op (ExtendOp) AluOPRRI)
1054+
(rule (extend_shift_op (ExtendOp.Zero)) (AluOPRRI.Srli))
1055+
(rule (extend_shift_op (ExtendOp.Signed)) (AluOPRRI.Srai))
1056+
1057+
;; In the most generic case, we shift left and then shift right.
1058+
;; The type of right shift is determined by the extend op.
1059+
(rule 0 (extend val extend_op (fits_in_32 from_ty) (fits_in_64 to_ty))
1060+
(let ((val Reg (value_regs_get val 0))
1061+
(shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits from_ty))))
1062+
(left Reg (alu_rr_imm12 (AluOPRRI.Slli) val shift))
1063+
(shift_op AluOPRRI (extend_shift_op extend_op))
1064+
(right Reg (alu_rr_imm12 shift_op left shift)))
1065+
right))
1066+
1067+
;; If we are zero extending a U8 we can use a `andi` instruction.
1068+
(rule 1 (extend val (ExtendOp.Zero) $I8 (fits_in_64 to_ty))
1069+
(let ((val Reg (value_regs_get val 0)))
1070+
(alu_rr_imm12 (AluOPRRI.Andi) val (imm12_const 255))))
1071+
1072+
;; When signed extending from 32 to 64 bits we can use a
1073+
;; `addiw val 0`. Also known as a `sext.w`
1074+
(rule 1 (extend val (ExtendOp.Signed) $I32 $I64)
1075+
(let ((val Reg (value_regs_get val 0)))
1076+
(alu_rr_imm12 (AluOPRRI.Addiw) val (imm12_const 0))))
1077+
1078+
1079+
;; No point in trying to use `packh` here to zero extend 8 bit values
1080+
;; since we can just use `andi` instead which is part of the base ISA.
1081+
1082+
;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values
1083+
(rule 1 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _))
1084+
(if-let $true (has_zbkb))
1085+
(let ((val Reg (value_regs_get val 0)))
1086+
(alu_rrr (AluOPRRR.Packw) val (zero_reg))))
10641087

1088+
;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers
1089+
(rule 1 (extend val (ExtendOp.Zero) $I32 $I64)
1090+
(if-let $true (has_zbkb))
1091+
(let ((val Reg (value_regs_get val 0)))
1092+
(alu_rrr (AluOPRRR.Pack) val (zero_reg))))
10651093

1066-
;;;; for I128 unsigned extend.
1067-
(rule 1
1068-
(lower_extend r $false 64 128)
1069-
(value_regs (gen_move2 r $I64 $I64) (load_u64_constant 0)))
10701094

1071-
(rule
1072-
(lower_extend r $false from_bits 128)
1073-
(value_regs (gen_extend r $false from_bits 64) (load_u64_constant 0)))
1095+
;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction.
1096+
(rule 1 (extend val (ExtendOp.Signed) $I8 (fits_in_64 _))
1097+
(if-let $true (has_zbb))
1098+
(let ((val Reg (value_regs_get val 0)))
1099+
(alu_rr_imm12 (AluOPRRI.Sextb) val (imm12_const 0))))
1100+
1101+
;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction.
1102+
(rule 1 (extend val (ExtendOp.Signed) $I16 (fits_in_64 _))
1103+
(if-let $true (has_zbb))
1104+
(let ((val Reg (value_regs_get val 0)))
1105+
(alu_rr_imm12 (AluOPRRI.Sexth) val (imm12_const 0))))
1106+
1107+
;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction.
1108+
(rule 2 (extend val (ExtendOp.Zero) $I16 (fits_in_64 _))
1109+
(if-let $true (has_zbb))
1110+
(let ((val Reg (value_regs_get val 0)))
1111+
(alu_rr_imm12 (AluOPRRI.Zexth) val (imm12_const 0))))
1112+
1113+
;;; Signed rules extending to I128
1114+
;; Extend the bottom part, and extract the sign bit from the bottom as the top
1115+
(rule 2 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
1116+
(let ((val Reg (value_regs_get val 0))
1117+
(low Reg (extend val (ExtendOp.Signed) from_ty $I64))
1118+
(high Reg (alu_rr_imm12 (AluOPRRI.Srai) low (imm12_const 63))))
1119+
(value_regs low high)))
1120+
1121+
;;; Unsigned rules extending to I128
1122+
;; Extend the bottom register to I64 and then just zero out the top half.
1123+
(rule 3 (extend val (ExtendOp.Zero) (fits_in_64 from_ty) $I128)
1124+
(let ((val Reg (value_regs_get val 0))
1125+
(low Reg (extend val (ExtendOp.Zero) from_ty $I64))
1126+
(high Reg (load_u64_constant 0)))
1127+
(value_regs low high)))
1128+
1129+
;; Catch all rule for ignoring extensions of the same type.
1130+
(rule 4 (extend val _ ty ty) val)
1131+
10741132

1075-
;; extract the sign bit of integer.
1076-
(decl ext_sign_bit (Type Reg) Reg)
1077-
(extern constructor ext_sign_bit ext_sign_bit)
10781133

10791134
(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs)
10801135
(rule
@@ -1795,50 +1850,6 @@
17951850
(rule (lower_icmp cc x y ty)
17961851
(gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty))
17971852

1798-
(decl lower_icmp_over_flow (ValueRegs ValueRegs Type) Reg)
1799-
1800-
;;; for I8 I16 I32
1801-
(rule 1
1802-
(lower_icmp_over_flow x y ty)
1803-
(let
1804-
((tmp Reg (alu_sub (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
1805-
(tmp2 WritableReg (temp_writable_reg $I64))
1806-
(_ Unit (emit (MInst.Extend tmp2 tmp $true (ty_bits ty) 64))))
1807-
(gen_icmp (IntCC.NotEqual) (writable_reg_to_reg tmp2) tmp $I64)))
1808-
1809-
;;; $I64
1810-
(rule 3
1811-
(lower_icmp_over_flow x y $I64)
1812-
(let
1813-
((y_sign Reg (alu_rrr (AluOPRRR.Sgt) y (zero_reg)))
1814-
(sub_result Reg (alu_sub x y))
1815-
(tmp Reg (alu_rrr (AluOPRRR.Slt) sub_result x)))
1816-
(gen_icmp (IntCC.NotEqual) y_sign tmp $I64)))
1817-
1818-
;;; $I128
1819-
(rule 2
1820-
(lower_icmp_over_flow x y $I128)
1821-
(let
1822-
( ;; x sign bit.
1823-
(xs Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get x 1) (imm12_const 63)))
1824-
;; y sign bit.
1825-
(ys Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get y 1) (imm12_const 63)))
1826-
;;
1827-
(sub_result ValueRegs (i128_sub x y))
1828-
;; result sign bit.
1829-
(rs Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get sub_result 1) (imm12_const 63)))
1830-
1831-
;;; xs && !ys && !rs
1832-
;;; x is positive y is negtive and result is negative.
1833-
;;; must overflow
1834-
(tmp1 Reg (alu_and xs (alu_and (gen_bit_not ys) (gen_bit_not rs))))
1835-
;;; !xs && ys && rs
1836-
;;; x is negative y is positive and result is positive.
1837-
;;; overflow
1838-
(tmp2 Reg (alu_and (gen_bit_not xs) (alu_and ys rs)))
1839-
;;;tmp3
1840-
(tmp3 Reg (alu_rrr (AluOPRRR.Or) tmp1 tmp2)))
1841-
(gen_extend tmp3 $true 1 64)))
18421853

18431854
(decl i128_sub (ValueRegs ValueRegs) ValueRegs)
18441855
(rule

cranelift/codegen/src/isa/riscv64/inst/args.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,9 @@ impl AluOPRRR {
746746
Self::Sh3add => "sh3add",
747747
Self::Sh3adduw => "sh3add.uw",
748748
Self::Xnor => "xnor",
749+
Self::Pack => "pack",
750+
Self::Packw => "packw",
751+
Self::Packh => "packh",
749752
}
750753
}
751754

@@ -785,6 +788,7 @@ impl AluOPRRR {
785788
AluOPRRR::Remw => 0b110,
786789
AluOPRRR::Remuw => 0b111,
787790

791+
// Zbb
788792
AluOPRRR::Adduw => 0b000,
789793
AluOPRRR::Andn => 0b111,
790794
AluOPRRR::Bclr => 0b001,
@@ -810,6 +814,11 @@ impl AluOPRRR {
810814
AluOPRRR::Sh3add => 0b110,
811815
AluOPRRR::Sh3adduw => 0b110,
812816
AluOPRRR::Xnor => 0b100,
817+
818+
// Zbkb
819+
AluOPRRR::Pack => 0b100,
820+
AluOPRRR::Packw => 0b100,
821+
AluOPRRR::Packh => 0b111,
813822
}
814823
}
815824

@@ -826,11 +835,16 @@ impl AluOPRRR {
826835
| AluOPRRR::Srl
827836
| AluOPRRR::Sra
828837
| AluOPRRR::Or
829-
| AluOPRRR::And => 0b0110011,
838+
| AluOPRRR::And
839+
| AluOPRRR::Pack
840+
| AluOPRRR::Packh => 0b0110011,
830841

831-
AluOPRRR::Addw | AluOPRRR::Subw | AluOPRRR::Sllw | AluOPRRR::Srlw | AluOPRRR::Sraw => {
832-
0b0111011
833-
}
842+
AluOPRRR::Addw
843+
| AluOPRRR::Subw
844+
| AluOPRRR::Sllw
845+
| AluOPRRR::Srlw
846+
| AluOPRRR::Sraw
847+
| AluOPRRR::Packw => 0b0111011,
834848

835849
AluOPRRR::Mul
836850
| AluOPRRR::Mulh
@@ -937,6 +951,11 @@ impl AluOPRRR {
937951
AluOPRRR::Sh3add => 0b0010000,
938952
AluOPRRR::Sh3adduw => 0b0010000,
939953
AluOPRRR::Xnor => 0b0100000,
954+
955+
// Zbkb
956+
AluOPRRR::Pack => 0b0000100,
957+
AluOPRRR::Packw => 0b0000100,
958+
AluOPRRR::Packh => 0b0000100,
940959
}
941960
}
942961

cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,38 @@ fn test_riscv64_binemit() {
514514
0x400545b3,
515515
));
516516

517+
// Zbkb
518+
insns.push(TestUnit::new(
519+
Inst::AluRRR {
520+
alu_op: AluOPRRR::Pack,
521+
rd: writable_a1(),
522+
rs1: a0(),
523+
rs2: zero_reg(),
524+
},
525+
"pack a1,a0,zero",
526+
0x080545b3,
527+
));
528+
insns.push(TestUnit::new(
529+
Inst::AluRRR {
530+
alu_op: AluOPRRR::Packw,
531+
rd: writable_a1(),
532+
rs1: a0(),
533+
rs2: zero_reg(),
534+
},
535+
"packw a1,a0,zero",
536+
0x080545bb,
537+
));
538+
insns.push(TestUnit::new(
539+
Inst::AluRRR {
540+
alu_op: AluOPRRR::Packh,
541+
rd: writable_a1(),
542+
rs1: a0(),
543+
rs2: zero_reg(),
544+
},
545+
"packh a1,a0,zero",
546+
0x080575b3,
547+
));
548+
517549
//
518550
insns.push(TestUnit::new(
519551
Inst::AluRRR {

0 commit comments

Comments
 (0)