x64: Fix encoding of RIP-relative addressing (#10819)

alexcrichton · abrown · web-flow · commit 83fe01eb1fd2 · 2025-05-20T22:42:23.000Z
* x64: Fix encoding of RIP-relative addressing This commit fixes a bug in the new assembler which was surfaced through the changes in #10782 but was a pre-existing issue. Specifically the encoding of a RIP-relative addressing mode required knowing the number of bytes at the end of an instruction but this was accidentally hardcoded to 0. In #10782 `imul` instructions were added where a RIP-relative address mode can be used in conjunction with an immediate which cause the RIP-relative addressing to load from the wrong address. This bug can in theory affect other instructions in the new assembler as well, but auditing the list of instructions it looks like `imul` is the only one that can possibly have an immediate after a RIP-relative addressing mode. That means that prior instructions using the new assembler should not be affected. * Fix testing the assembler crate * Apply suggestions from code review Co-authored-by: Andrew Brown <andrew.brown@intel.com> --------- Co-authored-by: Andrew Brown <andrew.brown@intel.com>
diff --git a/cranelift/assembler-x64/meta/src/generate/format.rs b/cranelift/assembler-x64/meta/src/generate/format.rs
@@ -148,6 +148,10 @@ impl dsl::Format {
             f.empty_line();
             f.comment("Emit ModR/M byte.");
         }
+        let bytes_at_end = match self.operands_by_kind().as_slice() {
+            [.., Imm(imm)] => imm.bytes(),
+            _ => 0,
+        };
 
         match self.operands_by_kind().as_slice() {
             [FixedReg(_)] | [FixedReg(_), FixedReg(_)] | [FixedReg(_), Imm(_)] => {
@@ -166,7 +170,10 @@ impl dsl::Format {
             | [FixedReg(_), FixedReg(_), RegMem(mem)] => {
                 let digit = rex.digit.unwrap();
                 fmtln!(f, "let digit = 0x{digit:x};");
-                fmtln!(f, "self.{mem}.encode_rex_suffixes(buf, off, digit, 0);");
+                fmtln!(
+                    f,
+                    "self.{mem}.encode_rex_suffixes(buf, off, digit, {bytes_at_end});"
+                );
             }
             [Reg(reg), RegMem(mem)]
             | [Reg(reg), RegMem(mem), Imm(_)]
@@ -175,7 +182,10 @@ impl dsl::Format {
             | [RegMem(mem), Reg(reg), Imm(_)]
             | [RegMem(mem), Reg(reg), FixedReg(_)] => {
                 fmtln!(f, "let reg = self.{reg}.enc();");
-                fmtln!(f, "self.{mem}.encode_rex_suffixes(buf, off, reg, 0);");
+                fmtln!(
+                    f,
+                    "self.{mem}.encode_rex_suffixes(buf, off, reg, {bytes_at_end});"
+                );
             }
             unknown => unimplemented!("unknown pattern: {unknown:?}"),
         }
diff --git a/cranelift/assembler-x64/src/fuzz.rs b/cranelift/assembler-x64/src/fuzz.rs
@@ -5,7 +5,8 @@
 //! unconditionally (use the `fuzz` feature instead).
 
 use crate::{
-    AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, Fixed, Gpr, Inst, NonRspGpr, Registers, Xmm,
+    AmodeOffset, AmodeOffsetPlusKnownOffset, AsReg, CodeSink, Constant, Fixed, Gpr, Inst, Label,
+    NonRspGpr, Registers, TrapCode, Xmm,
 };
 use arbitrary::{Arbitrary, Result, Unstructured};
 use capstone::{Capstone, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, arch::x86};
@@ -42,10 +43,75 @@ pub fn roundtrip(inst: &Inst<FuzzRegs>) {
 /// This will skip any traps or label registrations, but this is fine for the
 /// single-instruction disassembly we're doing here.
 fn assemble(inst: &Inst<FuzzRegs>) -> Vec<u8> {
-    let mut buffer = Vec::new();
+    let mut sink = TestCodeSink::default();
     let offsets: Vec<i32> = Vec::new();
-    inst.encode(&mut buffer, &offsets);
-    buffer
+    inst.encode(&mut sink, &offsets);
+    sink.patch_labels_as_if_they_referred_to_end();
+    sink.buf
+}
+
+#[derive(Default)]
+struct TestCodeSink {
+    buf: Vec<u8>,
+    offsets_using_label: Vec<u32>,
+}
+
+impl TestCodeSink {
+    /// References to labels, e.g. RIP-relative addressing, is stored with an
+    /// adjustment that takes into account the distance from the relative offset
+    /// to the end of the instruction, where the offset is relative to. That
+    /// means that to indeed make the offset relative to the end of the
+    /// instruction, which is what we pretend all labels are bound to, it's
+    /// required that this adjustment is taken into account.
+    ///
+    /// This function will iterate over all labels bound to this code sink and
+    /// pretend the label is found at the end of the `buf`. That means that the
+    /// distance from the label to the end of `buf` minus 4, which is the width
+    /// of the offset, is added to what's already present in the encoding buffer.
+    ///
+    /// This is effectively undoing the `bytes_at_end` adjustment that's part of
+    /// `Amode::RipRelative` addressing.
+    fn patch_labels_as_if_they_referred_to_end(&mut self) {
+        let len = i32::try_from(self.buf.len()).unwrap();
+        for offset in self.offsets_using_label.iter() {
+            let range = self.buf[*offset as usize..].first_chunk_mut::<4>().unwrap();
+            let offset = i32::try_from(*offset).unwrap() + 4;
+            let rel_distance = len - offset;
+            *range = (i32::from_le_bytes(*range) + rel_distance).to_le_bytes();
+        }
+    }
+}
+
+impl CodeSink for TestCodeSink {
+    fn put1(&mut self, v: u8) {
+        self.buf.extend_from_slice(&[v]);
+    }
+
+    fn put2(&mut self, v: u16) {
+        self.buf.extend_from_slice(&v.to_le_bytes());
+    }
+
+    fn put4(&mut self, v: u32) {
+        self.buf.extend_from_slice(&v.to_le_bytes());
+    }
+
+    fn put8(&mut self, v: u64) {
+        self.buf.extend_from_slice(&v.to_le_bytes());
+    }
+
+    fn add_trap(&mut self, _: TrapCode) {}
+
+    fn current_offset(&self) -> u32 {
+        self.buf.len().try_into().unwrap()
+    }
+
+    fn use_label_at_offset(&mut self, offset: u32, _: Label) {
+        self.offsets_using_label.push(offset);
+    }
+
+    fn get_label_for_constant(&mut self, c: Constant) -> Label {
+        Label(c.0)
+    }
 }
 
 /// Building a new `Capstone` each time is suboptimal (TODO).
diff --git a/cranelift/filetests/filetests/isa/x64/mul.clif b/cranelift/filetests/filetests/isa/x64/mul.clif
@@ -622,3 +622,67 @@ block0(v0: i8, v1: i8):
 ;   popq %rbp
 ;   retq
 
+function %imul_and_constant_pool_small_immediate() -> i32 {
+block0:
+    v0 = iconst.i32 7
+    v1 = iconst.i32 0x22222222
+    v2 = imul v0, v1
+    return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   imull $0x7, (%rip), %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   imull $7, 5(%rip), %eax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+;   andb (%rdx), %ah
+;   andb (%rdx), %ah
+;   addb %al, (%rax)
+;   addb %al, (%rax)
+
+function %imul_and_constant_pool_big_immediate() -> i32 {
+block0:
+    v0 = iconst.i32 0x11111111
+    v1 = iconst.i32 0x22222222
+    v2 = imul v0, v1
+    return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   imull $0x11111111, (%rip), %eax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   imull $0x11111111, 0xa(%rip), %eax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+;   addb %al, (%rax)
+;   addb %al, (%rax)
+;   addb %ah, (%rdx)
+;   andb (%rdx), %ah
+;   andb (%rax), %al
+;   addb %al, (%rax)
+
diff --git a/cranelift/filetests/filetests/runtests/arithmetic.clif b/cranelift/filetests/filetests/runtests/arithmetic.clif
@@ -556,3 +556,21 @@ block0(v0: i8):
 ; run: %udiv_i8_const(0) == 0
 ; run: %udiv_i8_const(-1) == 1
 ; run: %udiv_i8_const(0xFE) == 1
+
+function %imul_small_constant() -> i32 {
+block0:
+    v0 = iconst.i32 7
+    v1 = iconst.i32 0x22222222
+    v2 = imul v0, v1
+    return v2
+}
+; run: %imul_small_constant() == -286331154
+
+function %imul_big_constant() -> i32 {
+block0:
+    v0 = iconst.i32 0x11111111
+    v1 = iconst.i32 0x22222222
+    v2 = imul v0, v1
+    return v2
+}
+; run: %imul_big_constant() == 248153666