Skip to content

Commit a179f95

Browse files
authored
cranelift: 32bit div_s, rem_u, rem_s for aarch64 (bytecodealliance#9850)
* rename put_non_zero_in_reg to put_nonzero_in_reg_maybe_zext * handle 32bit sdiv * i32 rem_u/s * fix tests * fix rem * fmt * improve load_constant_full * fix tests, and review edits
1 parent 71ca453 commit a179f95

File tree

7 files changed

+180
-138
lines changed

7 files changed

+180
-138
lines changed

cranelift/codegen/src/isa/aarch64/inst.isle

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3598,12 +3598,17 @@
35983598
(orr_imm ty (zero_reg) n)
35993599
m k k))
36003600

3601-
(decl load_constant64_full (Type ImmExtend u64) Reg)
3602-
(extern constructor load_constant64_full load_constant64_full)
3601+
(decl load_constant_full (Type ImmExtend OperandSize u64) Reg)
3602+
(extern constructor load_constant_full load_constant_full)
3603+
3604+
;; Fallback for integral 32-bit constants
3605+
(rule (imm (fits_in_32 (integral_ty ty)) extend n)
3606+
(load_constant_full ty extend (operand_size $I32) n))
36033607

36043608
;; Fallback for integral 64-bit constants
3605-
(rule (imm (integral_ty ty) extend n)
3606-
(load_constant64_full ty extend n))
3609+
(rule -1 (imm (integral_ty $I64) extend n)
3610+
(load_constant_full $I64 extend (operand_size $I64) n))
3611+
36073612

36083613
;; Sign extension helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36093614

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -758,8 +758,7 @@ impl MachInstEmit for Inst {
758758
ALUOp::EorNot => 0b01001010_001,
759759
ALUOp::AddS => 0b00101011_000,
760760
ALUOp::SubS => 0b01101011_000,
761-
ALUOp::SDiv => 0b10011010_110,
762-
ALUOp::UDiv => 0b00011010_110,
761+
ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,
763762
ALUOp::RotR | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
764763
ALUOp::SMulH => 0b10011011_010,
765764
ALUOp::UMulH => 0b10011011_110,

cranelift/codegen/src/isa/aarch64/lower.isle

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,43 +1028,48 @@
10281028

10291029
;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10301030

1031-
;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
1032-
;; CLIF's `udiv` the check for zero needs to be manually performed.
1033-
1034-
(rule udiv 1 (lower (has_type $I64 (udiv x y)))
1035-
(a64_udiv $I64 (put_in_reg x) (put_nonzero_in_reg y)))
1031+
;; Enum representing the types of extensions
1032+
(type ExtType
1033+
(enum
1034+
(Signed)
1035+
(Unsigned)))
10361036

1037-
(rule udiv (lower (has_type (fits_in_32 ty) (udiv x y)))
1038-
(a64_udiv $I32 (put_in_reg_zext32 x) (put_nonzero_in_reg y)))
1039-
1040-
;; helpers for udiv:
10411037
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
1042-
(decl put_nonzero_in_reg (Value) Reg)
1038+
;; It takes a value and extension type, and performs the appropriate checks.
1039+
;; TODO: restore spec
1040+
; (spec (put_nonzero_in_reg_sext64 x)
1041+
; (provide (= (sign_ext 64 x) result))
1042+
; (require (not (= #x0000000000000000 result))))
1043+
(decl put_nonzero_in_reg (Value ExtType Type) Reg)
10431044

10441045
;; Special case where if a `Value` is known to be nonzero we can trivially
10451046
;; move it into a register.
1046-
(rule (put_nonzero_in_reg (and (value_type ty) (iconst (nonzero_u64_from_imm64 n))))
1047+
1048+
;; zero-extend non-zero constant
1049+
(rule (put_nonzero_in_reg (iconst (nonzero_u64_from_imm64 n)) (ExtType.Unsigned) ty)
10471050
(imm ty (ImmExtend.Zero) n))
10481051

1049-
(rule -1 (put_nonzero_in_reg (and (value_type $I64) val))
1052+
;; sign-extend non-zero constant
1053+
(rule (put_nonzero_in_reg (iconst (nonzero_u64_from_imm64 n)) (ExtType.Signed) ty)
1054+
(imm ty (ImmExtend.Sign) n))
1055+
1056+
(rule -1 (put_nonzero_in_reg val _ $I64)
10501057
(trap_if_zero_divisor (put_in_reg val) (operand_size $I64)))
10511058

1052-
(rule -2 (put_nonzero_in_reg (and (value_type (fits_in_32 _)) val))
1059+
(rule -2 (put_nonzero_in_reg val (ExtType.Signed) (fits_in_32 _))
1060+
(trap_if_zero_divisor (put_in_reg_sext32 val) (operand_size $I32)))
1061+
1062+
(rule -2 (put_nonzero_in_reg val (ExtType.Unsigned) (fits_in_32 _))
10531063
(trap_if_zero_divisor (put_in_reg_zext32 val) (operand_size $I32)))
10541064

1055-
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero and extending it to 64 bits.
1056-
(spec (put_nonzero_in_reg_zext64 x)
1057-
(provide (= result (zero_ext 64 x)))
1058-
(require (not (= result #x0000000000000000))))
1059-
(decl put_nonzero_in_reg_zext64 (Value) Reg)
1060-
(rule -1 (put_nonzero_in_reg_zext64 (and (value_type ty) val))
1061-
(trap_if_zero_divisor (put_in_reg_zext64 val) (operand_size ty)))
1065+
;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
1066+
;; CLIF's `udiv` the check for zero needs to be manually performed.
10621067

1063-
;; Special case where if a `Value` is known to be nonzero we can trivially
1064-
;; move it into a register.
1065-
(rule (put_nonzero_in_reg_zext64 (and (value_type ty)
1066-
(iconst (nonzero_u64_from_imm64 n))))
1067-
(imm ty (ImmExtend.Zero) n))
1068+
(rule udiv 1 (lower (has_type $I64 (udiv x y)))
1069+
(a64_udiv $I64 (put_in_reg x) (put_nonzero_in_reg y (ExtType.Unsigned) $I64)))
1070+
1071+
(rule udiv (lower (has_type (fits_in_32 ty) (udiv x y)))
1072+
(a64_udiv $I32 (put_in_reg_zext32 x) (put_nonzero_in_reg y (ExtType.Unsigned) ty)))
10681073

10691074
;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10701075

@@ -1088,33 +1093,34 @@
10881093
;;
10891094
;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
10901095
;; necessary, but right now `y` is checked to not be -1 as well.
1091-
(rule sdiv_base_case (lower (has_type (fits_in_64 ty) (sdiv x y)))
1096+
1097+
(rule sdiv_base_case (lower (has_type $I64 (sdiv x y)))
10921098
(let ((x64 Reg (put_in_reg_sext64 x))
1093-
(y64 Reg (put_nonzero_in_reg_sext64 y))
1094-
(intmin_check_x Reg (intmin_check ty x64))
1095-
(valid_x64 Reg (trap_if_div_overflow ty intmin_check_x x64 y64))
1099+
(y64 Reg (put_nonzero_in_reg y (ExtType.Signed) $I64))
1100+
(intmin_check_x Reg (intmin_check $I64 x64))
1101+
(valid_x64 Reg (trap_if_div_overflow $I64 intmin_check_x x64 y64))
10961102
(result Reg (a64_sdiv $I64 valid_x64 y64)))
10971103
result))
10981104

1105+
(rule sdiv_base_case -1 (lower (has_type (fits_in_32 ty) (sdiv x y)))
1106+
(let ((x32 Reg (put_in_reg_sext32 x))
1107+
(y32 Reg (put_nonzero_in_reg y (ExtType.Signed) ty))
1108+
(intmin_check_x Reg (intmin_check ty x32))
1109+
(valid_x32 Reg (trap_if_div_overflow ty intmin_check_x x32 y32))
1110+
(result Reg (a64_sdiv ty valid_x32 y32)))
1111+
result))
1112+
10991113
;; Special case for `sdiv` where no checks are needed due to division by a
11001114
;; constant meaning the checks are always passed.
1101-
(rule sdiv_safe_divisor 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst imm))))
1115+
(rule sdiv_safe_divisor 2 (lower (has_type $I64 (sdiv x (iconst imm))))
1116+
(if-let y (safe_divisor_from_imm64 $I64 imm))
1117+
(a64_sdiv $I64 (put_in_reg_sext64 x) (imm $I64 (ImmExtend.Sign) y)))
1118+
1119+
(rule sdiv_safe_divisor 1 (lower (has_type (fits_in_32 ty) (sdiv x (iconst imm))))
11021120
(if-let y (safe_divisor_from_imm64 ty imm))
1103-
(a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y)))
1121+
(a64_sdiv ty (put_in_reg_sext32 x) (imm ty (ImmExtend.Sign) y)))
11041122

11051123
;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
1106-
(spec (put_nonzero_in_reg_sext64 x)
1107-
(provide (= (sign_ext 64 x) result))
1108-
(require (not (= #x0000000000000000 result))))
1109-
(decl put_nonzero_in_reg_sext64 (Value) Reg)
1110-
(rule -1 (put_nonzero_in_reg_sext64 val)
1111-
(trap_if_zero_divisor (put_in_reg_sext64 val) (operand_size $I64)))
1112-
1113-
;; Note that this has a special case where if the `Value` is a constant that's
1114-
;; not zero we can skip the zero check.
1115-
(rule (put_nonzero_in_reg_sext64 (and (value_type ty)
1116-
(iconst (nonzero_u64_from_imm64 n))))
1117-
(imm ty (ImmExtend.Sign) n))
11181124

11191125
;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11201126

@@ -1130,20 +1136,36 @@
11301136
;; div rd, x, y ; rd = x / y
11311137
;; msub rd, rd, y, x ; rd = x - rd * y
11321138

1133-
(rule urem (lower (has_type (fits_in_64 ty) (urem x y)))
1139+
;; TODO: we can avoid a 0 check, if the dividend is a non-0 constant
1140+
1141+
(rule urem (lower (has_type $I64 (urem x y)))
11341142
(let ((x64 Reg (put_in_reg_zext64 x))
1135-
(y64 Reg (put_nonzero_in_reg_zext64 y))
1143+
(y64 Reg (put_nonzero_in_reg y (ExtType.Unsigned) $I64))
11361144
(div Reg (a64_udiv $I64 x64 y64))
11371145
(result Reg (msub $I64 div y64 x64)))
11381146
result))
11391147

1140-
(rule srem (lower (has_type (fits_in_64 ty) (srem x y)))
1148+
(rule urem -1 (lower (has_type (fits_in_32 ty) (urem x y)))
1149+
(let ((x64 Reg (put_in_reg_zext32 x))
1150+
(y64 Reg (put_nonzero_in_reg y (ExtType.Unsigned) ty))
1151+
(div Reg (a64_udiv ty x64 y64))
1152+
(result Reg (msub ty div y64 x64)))
1153+
result))
1154+
1155+
(rule srem (lower (has_type $I64 (srem x y)))
11411156
(let ((x64 Reg (put_in_reg_sext64 x))
1142-
(y64 Reg (put_nonzero_in_reg_sext64 y))
1157+
(y64 Reg (put_nonzero_in_reg y (ExtType.Signed) $I64))
11431158
(div Reg (a64_sdiv $I64 x64 y64))
11441159
(result Reg (msub $I64 div y64 x64)))
11451160
result))
11461161

1162+
(rule srem -1 (lower (has_type (fits_in_32 ty) (srem x y)))
1163+
(let ((x64 Reg (put_in_reg_sext32 x))
1164+
(y64 Reg (put_nonzero_in_reg y (ExtType.Signed) ty))
1165+
(div Reg (a64_sdiv ty x64 y64))
1166+
(result Reg (msub ty div y64 x64)))
1167+
result))
1168+
11471169
;;; Rules for integer min/max: umin, smin, umax, smax ;;;;;;;;;;;;;;;;;;;;;;;;;
11481170

11491171
;; `i64` and smaller.

cranelift/codegen/src/isa/aarch64/lower/isle.rs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
// Pull in the ISLE generated code.
44
pub mod generated_code;
5-
use generated_code::Context;
5+
use generated_code::{Context, ImmExtend};
66

77
// Types that the generated ISLE code uses via `use super::*`.
88
use super::{
@@ -30,6 +30,7 @@ use crate::{
3030
abi::ArgPair, ty_bits, InstOutput, IsTailCall, MachInst, VCodeConstant, VCodeConstantData,
3131
},
3232
};
33+
use core::u32;
3334
use regalloc2::PReg;
3435
use std::boxed::Box;
3536
use std::vec::Vec;
@@ -206,24 +207,36 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
206207
///
207208
/// The logic here is nontrivial enough that it's not really worth porting
208209
/// this over to ISLE.
209-
fn load_constant64_full(
210+
fn load_constant_full(
210211
&mut self,
211212
ty: Type,
212-
extend: &generated_code::ImmExtend,
213+
extend: &ImmExtend,
214+
extend_to: &OperandSize,
213215
value: u64,
214216
) -> Reg {
215217
let bits = ty.bits();
216-
let value = if bits < 64 {
217-
if *extend == generated_code::ImmExtend::Sign {
218+
219+
let value = match (extend_to, *extend) {
220+
(OperandSize::Size32, ImmExtend::Sign) if bits < 32 => {
221+
let shift = 32 - bits;
222+
let value = value as i32;
223+
224+
// we cast first to a u32 and then to a u64, to ensure that we are representing a
225+
// i32 in a u64, and not a i64. This is important, otherwise value will not fit in
226+
// 32 bits
227+
((value << shift) >> shift) as u32 as u64
228+
}
229+
(OperandSize::Size32, ImmExtend::Zero) if bits < 32 => {
230+
value & !((u32::MAX as u64) << bits)
231+
}
232+
(OperandSize::Size64, ImmExtend::Sign) if bits < 64 => {
218233
let shift = 64 - bits;
219234
let value = value as i64;
220235

221236
((value << shift) >> shift) as u64
222-
} else {
223-
value & !(u64::MAX << bits)
224237
}
225-
} else {
226-
value
238+
(OperandSize::Size64, ImmExtend::Zero) if bits < 64 => value & !(u64::MAX << bits),
239+
_ => value,
227240
};
228241

229242
// Divide the value into 16-bit slices that we can manipulate using

cranelift/filetests/filetests/isa/aarch64/arithmetic.clif

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -215,24 +215,20 @@ block0(v0: i32, v1: i32):
215215

216216
; VCode:
217217
; block0:
218-
; sxtw x3, w0
219-
; sxtw x5, w1
220-
; cbz x5, #trap=int_divz
221-
; adds wzr, w5, #1
222-
; ccmp w3, #1, #nzcv, eq
218+
; cbz w1, #trap=int_divz
219+
; adds wzr, w1, #1
220+
; ccmp w0, #1, #nzcv, eq
223221
; b.vs #trap=int_ovf
224-
; sdiv x0, x3, x5
222+
; sdiv w0, w0, w1
225223
; ret
226224
;
227225
; Disassembled:
228226
; block0: ; offset 0x0
229-
; sxtw x3, w0
230-
; sxtw x5, w1
231-
; cbz x5, #0x20
232-
; cmn w5, #1
233-
; ccmp w3, #1, #0, eq
234-
; b.vs #0x24
235-
; sdiv x0, x3, x5
227+
; cbz w1, #0x18
228+
; cmn w1, #1
229+
; ccmp w0, #1, #0, eq
230+
; b.vs #0x1c
231+
; sdiv w0, w0, w1
236232
; ret
237233
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
238234
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
@@ -246,16 +242,14 @@ block0(v0: i32):
246242

247243
; VCode:
248244
; block0:
249-
; sxtw x2, w0
250-
; movz w4, #2
251-
; sdiv x0, x2, x4
245+
; movz w2, #2
246+
; sdiv w0, w0, w2
252247
; ret
253248
;
254249
; Disassembled:
255250
; block0: ; offset 0x0
256-
; sxtw x2, w0
257-
; mov w4, #2
258-
; sdiv x0, x2, x4
251+
; mov w2, #2
252+
; sdiv w0, w0, w2
259253
; ret
260254

261255
function %f14(i32, i32) -> i32 {
@@ -304,20 +298,16 @@ block0(v0: i32, v1: i32):
304298

305299
; VCode:
306300
; block0:
307-
; sxtw x3, w0
308-
; sxtw x5, w1
309-
; cbz x5, #trap=int_divz
310-
; sdiv x8, x3, x5
311-
; msub x0, x8, x5, x3
301+
; cbz w1, #trap=int_divz
302+
; sdiv w4, w0, w1
303+
; msub w0, w4, w1, w0
312304
; ret
313305
;
314306
; Disassembled:
315307
; block0: ; offset 0x0
316-
; sxtw x3, w0
317-
; sxtw x5, w1
318-
; cbz x5, #0x18
319-
; sdiv x8, x3, x5
320-
; msub x0, x8, x5, x3
308+
; cbz w1, #0x10
309+
; sdiv w4, w0, w1
310+
; msub w0, w4, w1, w0
321311
; ret
322312
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
323313

@@ -329,20 +319,16 @@ block0(v0: i32, v1: i32):
329319

330320
; VCode:
331321
; block0:
332-
; mov w3, w0
333-
; mov w5, w1
334-
; cbz w5, #trap=int_divz
335-
; udiv x8, x3, x5
336-
; msub x0, x8, x5, x3
322+
; cbz w1, #trap=int_divz
323+
; udiv w4, w0, w1
324+
; msub w0, w4, w1, w0
337325
; ret
338326
;
339327
; Disassembled:
340328
; block0: ; offset 0x0
341-
; mov w3, w0
342-
; mov w5, w1
343-
; cbz w5, #0x18
344-
; udiv x8, x3, x5
345-
; msub x0, x8, x5, x3
329+
; cbz w1, #0x10
330+
; udiv w4, w0, w1
331+
; msub w0, w4, w1, w0
346332
; ret
347333
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
348334

@@ -795,3 +781,24 @@ block0(v0: i64):
795781
; ret
796782
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
797783

784+
function %sdiv_i16_const(i16) -> i16 {
785+
block0(v0: i16):
786+
v1 = iconst.i16 -2
787+
v2 = sdiv v0, v1
788+
return v2
789+
}
790+
791+
; VCode:
792+
; block0:
793+
; sxth w2, w0
794+
; movn w4, #1
795+
; sdiv w0, w2, w4
796+
; ret
797+
;
798+
; Disassembled:
799+
; block0: ; offset 0x0
800+
; sxth w2, w0
801+
; mov w4, #-2
802+
; sdiv w0, w2, w4
803+
; ret
804+

0 commit comments

Comments
 (0)