x64: Fix lowering rules for shld (#12321)

alexcrichton · web-flow · commit c65ba686ccf9 · 2026-01-12T18:37:40.000Z
* x64: Fix lowering rules for `shld` These failed to account for the edge case of 0/width(type) shifts where the `shld` instruction is no longer applicable. Guards are added to ensure that the shift amounts are both greater than zero. This in theory shouldn't have much practical impact since shift-by-zero and shift-by-type-width are both optimized away in the mid-end. That means that this is only possible to expose with opt-level=0 which may help explain why this went undiscovered for ~1 year. Closes #12318 * Fix Winch's encoding of shift-by-32
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
@@ -497,10 +497,14 @@
 (rule 8 (lower (has_type (ty_int_ref_16_to_64 ty)
   (bor (ishl x (u8_from_iconst xs)) (ushr y (u8_from_iconst ys)))))
   (if-let true (u64_eq (ty_bits ty) (u64_wrapping_add xs ys)))
+  (if-let true (u64_gt xs 0))
+  (if-let true (u64_gt ys 0))
   (x64_shld ty x y xs))
 (rule 8 (lower (has_type (ty_int_ref_16_to_64 ty)
   (bor (ushr y (u8_from_iconst ys)) (ishl x (u8_from_iconst xs)))))
   (if-let true (u64_eq (ty_bits ty) (u64_wrapping_add xs ys)))
+  (if-let true (u64_gt xs 0))
+  (if-let true (u64_gt ys 0))
   (x64_shld ty x y xs))
 
 
diff --git a/cranelift/filetests/filetests/isa/x64/shld.clif b/cranelift/filetests/filetests/isa/x64/shld.clif
@@ -237,3 +237,70 @@ block0(v0: i8, v1: i8):
 ;   popq %rbp
 ;   retq
 
+function %x64_shld_i32_32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = ushr_imm v0, 32
+    v3 = ishl_imm v1, 0
+    v4 = bor v2, v3
+    return v4
+}
+
+; VCode:
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block0:
+;   shrl $0x0, %edi
+;   shll $0x0, %esi
+;   movq %rdi, %rax
+;   orl %esi, %eax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   shrl $0, %edi
+;   shll $0, %esi
+;   movq %rdi, %rax
+;   orl %esi, %eax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+
+function %x64_shld_i32_32_swap(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = ishl_imm v1, 0
+    v3 = ushr_imm v0, 32
+    v4 = bor v2, v3
+    return v4
+}
+
+; VCode:
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block0:
+;   shll $0x0, %esi
+;   shrl $0x0, %edi
+;   movq %rsi, %rax
+;   orl %edi, %eax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   shll $0, %esi
+;   shrl $0, %edi
+;   movq %rsi, %rax
+;   orl %edi, %eax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
diff --git a/tests/misc_testsuite/issue12318.wast b/tests/misc_testsuite/issue12318.wast
@@ -0,0 +1,22 @@
+(module
+  (func (export "constants") (result i32)
+    i32.const 24
+    i32.const 32
+    i32.shr_u
+    i32.const 1
+    i32.const 0
+    i32.shl
+    i32.or)
+
+  (func (export "variables") (param i32 i32) (result i32)
+    local.get 0
+    i32.const 32
+    i32.shr_u
+    local.get 1
+    i32.const 0
+    i32.shl
+    i32.or)
+)
+
+(assert_return (invoke "constants") (i32.const 25))
+(assert_return (invoke "variables" (i32.const 24) (i32.const 1)) (i32.const 25))
diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs
@@ -764,8 +764,10 @@ impl Masm for MacroAssembler {
         size: OperandSize,
     ) -> Result<()> {
         match ImmShift::maybe_from_u64(imm.unwrap_as_u64()) {
-            Some(imml) => self.asm.shift_ir(imml, lhs, dst, kind, size),
-            None => {
+            Some(imml) if imml.value() < size.num_bits() => {
+                self.asm.shift_ir(imml, lhs, dst, kind, size)
+            }
+            _ => {
                 self.with_scratch::<IntScratch, _>(|masm, scratch| {
                     masm.asm.mov_ir(scratch.writable(), imm, imm.size());
                     masm.asm.shift_rrr(scratch.inner(), lhs, dst, kind, size);