Skip to content

Commit c65ba68

Browse files
authored
x64: Fix lowering rules for shld (#12321)
* x64: Fix lowering rules for `shld` These failed to account for the edge case of 0/width(type) shifts where the `shld` instruction is no longer applicable. Guards are added to ensure that the shift amounts are both greater than zero. This in theory shouldn't have much practical impact since shift-by-zero and shift-by-type-width are both optimized away in the mid-end. That means that this is only possible to expose with opt-level=0 which may help explain why this went undiscovered for ~1 year. Closes #12318 * Fix Winch's encoding of shift-by-32
1 parent 7981473 commit c65ba68

File tree

4 files changed

+97
-2
lines changed

4 files changed

+97
-2
lines changed

cranelift/codegen/src/isa/x64/lower.isle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,10 +497,14 @@
497497
(rule 8 (lower (has_type (ty_int_ref_16_to_64 ty)
498498
(bor (ishl x (u8_from_iconst xs)) (ushr y (u8_from_iconst ys)))))
499499
(if-let true (u64_eq (ty_bits ty) (u64_wrapping_add xs ys)))
500+
(if-let true (u64_gt xs 0))
501+
(if-let true (u64_gt ys 0))
500502
(x64_shld ty x y xs))
501503
(rule 8 (lower (has_type (ty_int_ref_16_to_64 ty)
502504
(bor (ushr y (u8_from_iconst ys)) (ishl x (u8_from_iconst xs)))))
503505
(if-let true (u64_eq (ty_bits ty) (u64_wrapping_add xs ys)))
506+
(if-let true (u64_gt xs 0))
507+
(if-let true (u64_gt ys 0))
504508
(x64_shld ty x y xs))
505509

506510

cranelift/filetests/filetests/isa/x64/shld.clif

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,70 @@ block0(v0: i8, v1: i8):
237237
; popq %rbp
238238
; retq
239239

240+
function %x64_shld_i32_32(i32, i32) -> i32 {
241+
block0(v0: i32, v1: i32):
242+
v2 = ushr_imm v0, 32
243+
v3 = ishl_imm v1, 0
244+
v4 = bor v2, v3
245+
return v4
246+
}
247+
248+
; VCode:
249+
; pushq %rbp
250+
; movq %rsp, %rbp
251+
; block0:
252+
; shrl $0x0, %edi
253+
; shll $0x0, %esi
254+
; movq %rdi, %rax
255+
; orl %esi, %eax
256+
; movq %rbp, %rsp
257+
; popq %rbp
258+
; retq
259+
;
260+
; Disassembled:
261+
; block0: ; offset 0x0
262+
; pushq %rbp
263+
; movq %rsp, %rbp
264+
; block1: ; offset 0x4
265+
; shrl $0, %edi
266+
; shll $0, %esi
267+
; movq %rdi, %rax
268+
; orl %esi, %eax
269+
; movq %rbp, %rsp
270+
; popq %rbp
271+
; retq
272+
273+
274+
function %x64_shld_i32_32_swap(i32, i32) -> i32 {
275+
block0(v0: i32, v1: i32):
276+
v2 = ishl_imm v1, 0
277+
v3 = ushr_imm v0, 32
278+
v4 = bor v2, v3
279+
return v4
280+
}
281+
282+
; VCode:
283+
; pushq %rbp
284+
; movq %rsp, %rbp
285+
; block0:
286+
; shll $0x0, %esi
287+
; shrl $0x0, %edi
288+
; movq %rsi, %rax
289+
; orl %edi, %eax
290+
; movq %rbp, %rsp
291+
; popq %rbp
292+
; retq
293+
;
294+
; Disassembled:
295+
; block0: ; offset 0x0
296+
; pushq %rbp
297+
; movq %rsp, %rbp
298+
; block1: ; offset 0x4
299+
; shll $0, %esi
300+
; shrl $0, %edi
301+
; movq %rsi, %rax
302+
; orl %edi, %eax
303+
; movq %rbp, %rsp
304+
; popq %rbp
305+
; retq
306+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
(module
2+
(func (export "constants") (result i32)
3+
i32.const 24
4+
i32.const 32
5+
i32.shr_u
6+
i32.const 1
7+
i32.const 0
8+
i32.shl
9+
i32.or)
10+
11+
(func (export "variables") (param i32 i32) (result i32)
12+
local.get 0
13+
i32.const 32
14+
i32.shr_u
15+
local.get 1
16+
i32.const 0
17+
i32.shl
18+
i32.or)
19+
)
20+
21+
(assert_return (invoke "constants") (i32.const 25))
22+
(assert_return (invoke "variables" (i32.const 24) (i32.const 1)) (i32.const 25))

winch/codegen/src/isa/aarch64/masm.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,10 @@ impl Masm for MacroAssembler {
764764
size: OperandSize,
765765
) -> Result<()> {
766766
match ImmShift::maybe_from_u64(imm.unwrap_as_u64()) {
767-
Some(imml) => self.asm.shift_ir(imml, lhs, dst, kind, size),
768-
None => {
767+
Some(imml) if imml.value() < size.num_bits() => {
768+
self.asm.shift_ir(imml, lhs, dst, kind, size)
769+
}
770+
_ => {
769771
self.with_scratch::<IntScratch, _>(|masm, scratch| {
770772
masm.asm.mov_ir(scratch.writable(), imm, imm.size());
771773
masm.asm.shift_rrr(scratch.inner(), lhs, dst, kind, size);

0 commit comments

Comments
 (0)