ZJIT: A64: Use MOVN for small negative immediates

XrXr · XrXr · commit f58fca7de012 · 2025-08-01T17:32:01.000-04:00
Save a couple instructions to load a small negative constant into a
register. In fact MOVN is speced to alias as `mov` in the official
disassembly.
diff --git a/zjit/src/asm/arm64/inst/mov.rs b/zjit/src/asm/arm64/inst/mov.rs
@@ -2,6 +2,9 @@ use super::super::arg::Sf;
 
 /// Which operation is being performed.
 enum Op {
+    /// A movn operation which inverts the immediate and zeroes out the other bits.
+    MOVN = 0b00,
+
     /// A movz operation which zeroes out the other bits.
     MOVZ = 0b10,
 
@@ -61,6 +64,12 @@ impl Mov {
         Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() }
     }
 
+    /// MOVN
+    /// <https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOVN--Move-wide-with-NOT->
+    pub fn movn(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
+        Self { rd, imm16, hw: hw.into(), op: Op::MOVN, sf: num_bits.into() }
+    }
+
     /// MOVZ
     /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en>
     pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
@@ -104,6 +113,34 @@ mod tests {
         assert_eq!(0xf2800f60, result);
     }
 
+    #[test]
+    fn test_movn_unshifted() {
+        let inst = Mov::movn(0, 123, 0, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92800f60, result);
+    }
+
+    #[test]
+    fn test_movn_shifted_16() {
+        let inst = Mov::movn(0, 123, 16, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92a00f60, result);
+    }
+
+    #[test]
+    fn test_movn_shifted_32() {
+        let inst = Mov::movn(0, 123, 32, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92c00f60, result);
+    }
+
+    #[test]
+    fn test_movn_shifted_48() {
+        let inst = Mov::movn(0, 123, 48, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92e00f60, result);
+    }
+
     #[test]
     fn test_movk_shifted_16() {
         let inst = Mov::movk(0, 123, 16, 64);
diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs
@@ -716,6 +716,21 @@ pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
     cb.write_bytes(&bytes);
 }
 
+/// MOVN - load a register with the complement of a shifted then zero extended 16-bit immediate
+/// <https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOVN--Move-wide-with-NOT->
+pub fn movn(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
+    let bytes: [u8; 4] = match (rd, imm16) {
+        (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => {
+            assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
+
+            Mov::movn(rd.reg_no, imm16 as u16, shift, rd.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to movn instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// MOVZ - move a 16 bit immediate into a register, zero the other bits
 pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
     let bytes: [u8; 4] = match (rd, imm16) {
@@ -1543,6 +1558,11 @@ mod tests {
         check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16));
     }
 
+    #[test]
+    fn test_movn() {
+        check_bytes("600fa092", |cb| movn(cb, X0, A64Opnd::new_uimm(123), 16));
+    }
+
     #[test]
     fn test_movz() {
         check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16));
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
@@ -140,6 +140,10 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
         // instruction, then we'll use that.
         movz(cb, rd, A64Opnd::new_uimm(current), 0);
         return 1;
+    } else if u16::try_from(!value).is_ok() {
+        // For small negative values, use a single movn
+        movn(cb, rd, A64Opnd::new_uimm(!value), 0);
+        return 1;
     } else if BitmaskImmediate::try_from(current).is_ok() {
         // Otherwise, if the immediate can be encoded
         // with the special bitmask immediate encoding,
@@ -1592,15 +1596,16 @@ mod tests {
 
         // Test values that exercise various types of immediates.
         //  - 9 bit displacement for Load/Store
-        //  - 12 bit shifted immediate
+        //  - 12 bit ADD/SUB shifted immediate
+        //  - 16 bit MOV family shifted immediates
         //  - bit mask immediates
-        for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x1008, i32::MIN] {
+        for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x10008, i32::MIN] {
             let mem = Opnd::mem(64, NATIVE_STACK_PTR, displacement);
             asm.lea_into(Opnd::Reg(X0_REG), mem);
         }
 
         asm.compile_with_num_regs(&mut cb, 0);
-        assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d100009dd2e0ffbff2e0ffdff2e0fffff2e063208b00ff9dd2e0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", "
+        assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d1e0ff8292e063208b00ff9fd2c0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", "
             0x0: orr x0, xzr, #0x7fffffff
             0x4: add x0, sp, x0
             0x8: mov x0, #8
@@ -1610,18 +1615,15 @@ mod tests {
             0x18: add x0, sp, x0
             0x1c: add x0, sp, #0x208
             0x20: sub x0, sp, #0x208
-            0x24: mov x0, #0xe800
-            0x28: movk x0, #0xffff, lsl #16
-            0x2c: movk x0, #0xffff, lsl #32
-            0x30: movk x0, #0xffff, lsl #48
-            0x34: add x0, sp, x0
-            0x38: mov x0, #0xeff8
-            0x3c: movk x0, #0xffff, lsl #16
-            0x40: movk x0, #0xffff, lsl #32
-            0x44: movk x0, #0xffff, lsl #48
-            0x48: add x0, sp, x0
-            0x4c: orr x0, xzr, #0xffffffff80000000
-            0x50: add x0, sp, x0
+            0x24: mov x0, #-0x1800
+            0x28: add x0, sp, x0
+            0x2c: mov x0, #0xfff8
+            0x30: movk x0, #0xfffe, lsl #16
+            0x34: movk x0, #0xffff, lsl #32
+            0x38: movk x0, #0xffff, lsl #48
+            0x3c: add x0, sp, x0
+            0x40: orr x0, xzr, #0xffffffff80000000
+            0x44: add x0, sp, x0
         ");
     }