From 3056f39874354cf285fd1a453daf9dec216b377e Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 24 Sep 2025 08:09:01 -0700 Subject: [PATCH 1/5] Add tests. --- llvm/test/CodeGen/AArch64/arm64-movi.ll | 55 +++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll index c9074c2adbe3c..cc1ba5c9c1536 100644 --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -549,3 +549,58 @@ define i64 @orr_32_eor_64() nounwind { ; CHECK-NEXT: ret ret i64 18446604367017541391 } + +;==--------------------------------------------------------------------------== +; Tests for EOR / EON with MOVN. +;==--------------------------------------------------------------------------== + +define i64 @movn_0_eon() { +; CHECK-LABEL: movn_0_eon: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #43690 // =0xaaaa +; CHECK-NEXT: movk x0, #21845, lsl #32 +; CHECK-NEXT: movk x0, #65535, lsl #48 +; CHECK-NEXT: ret + ret i64 u0xffff55550000aaaa +} + +define i64 @movn_1_eon() { +; CHECK-LABEL: movn_1_eon: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #2863267840 // =0xaaaa0000 +; CHECK-NEXT: movk x0, #65535, lsl #32 +; CHECK-NEXT: movk x0, #21845, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x5555ffffaaaa0000 +} + +define i64 @movn_0_eor() { +; CHECK-LABEL: movn_0_eor: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #21845 // =0x5555 +; CHECK-NEXT: movk x0, #65535, lsl #16 +; CHECK-NEXT: movk x0, #43690, lsl #32 +; CHECK-NEXT: ret + ret i64 u0x0000aaaaffff5555 +} + +define i64 @movn_1_eor() { +; CHECK-LABEL: movn_1_eor: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #65535 // =0xffff +; CHECK-NEXT: movk x0, #21845, lsl #16 +; CHECK-NEXT: movk x0, #43690, lsl #48 +; CHECK-NEXT: ret + ret i64 u0xaaaa00005555ffff +} + +define i64 @movn_movk_eor() { +; CHECK-LABEL: movn_movk_eor: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #43690 // =0xaaaa +; CHECK-NEXT: movk x0, #52428, lsl #16 +; CHECK-NEXT: movk x0, #21845, lsl #32 +; CHECK-NEXT: movk x0, #13107, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x33335555ccccaaaa +} From 151b440030f8001ef611eef6c0f7675990c0b5b8 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 24 Sep 2025 08:08:24 -0700 Subject: [PATCH 2/5] [AArch64] Improve expansion of immediates of the form (~w << 32 | w). When one half of a 64-bit immediate corresponds to the negation of the other half, we can use a sequence of MOVN, MOVK and EOR to expand the bottom half of the immediate and replicate its negation to the top half. In the general case, this saves us a MOVK compared to expanding the immediate explicitly. As a refinement, when the bottom half contains a 16-bit chunk of ones, the intermediate MOVK can be omitted. Similarly, when the bottom half contains a chunk of zeros, we can alternatively expand its negation and use a EON to reconstruct the expected result. In either case, this still saves us a MOVK compared to the default expansion. --- llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 58 ++++++++++++++++++- .../AArch64/AArch64ExpandPseudoInsts.cpp | 2 + llvm/test/CodeGen/AArch64/arm64-movi.ll | 25 ++++---- 3 files changed, 69 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp index 98016271a9d00..184c45448dc9a 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp @@ -239,6 +239,57 @@ static bool trySequenceOfOnes(uint64_t UImm, return true; } +// Attempt to expand 64-bit immediate values whose negated upper half match +// the lower half (for example, 0x1234'5678'edcb'a987). +// Immediates of this form can generally be expanded via a sequence of +// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate +// the result to the upper half, e.g.: +// mov x0, #-22137 // =0xffffffffffffa987 +// movk x0, #60875, lsl #16 // =0xffffffffedcba987 +// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000 +// =0x12345678edcba987. +// When the lower half contains a 16-bit chunk of ones, such as +// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant. +// Similarly, when it contains a 16-bit chunk of zeros, such as +// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding +// the negation of the lower half and negating the result with an EON, e.g.: +// mov x0, #-43400 // =0xffffffffffff5678 +// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000 +// =0xffffffffffff5678 ^ 0x0000a987ffffffff +// =0xffff56780000a987. +// In any of these cases, the expansion with EOR/EON saves an instruction +// compared to the default expansion based on MOV and MOVKs. +static bool tryCopyWithNegation(uint64_t Imm, + SmallVectorImpl &Insn) { + // We need the negation of the upper half of Imm to match the lower half. + // Degenerate cases where Imm is a run of ones should be handled separately. + if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm)) + return false; + + const unsigned Mask = 0xffff; + unsigned Opc = AArch64::EORXrs; + + // If we have a chunk of all zeros in the lower half, we can save a MOVK by + // materialising the negated immediate and negating the result with an EON. + if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) { + Opc = AArch64::EONXrs; + Imm = ~Imm; + } + + unsigned Imm0 = Imm & Mask; + unsigned Imm16 = (Imm >> 16) & Mask; + if (Imm0 != Mask) { + Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0}); + if (Imm16 != Mask) + Insn.push_back({AArch64::MOVKXi, Imm16, 16}); + } else { + Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16}); + } + + Insn.push_back({Opc, 0, 32}); + return true; +} + static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) { uint64_t NumOnes = llvm::countr_one(V >> StartPosition); @@ -617,7 +668,12 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize, // FIXME: Add more two-instruction sequences. // Three instruction sequences. - // + + // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register). + // The MOVK can be avoided if Imm contains a zero / one chunk. + if (tryCopyWithNegation(Imm, Insn)) + return; + // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly // the fastest sequence with fast literal generation. (If neither MOVK is // part of a fast literal generation pair, it could be slower than the diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 0f4bbfc3d610e..536260afb9482 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, .addImm(I->Op2)); } break; + case AArch64::EONXrs: + case AArch64::EORXrs: case AArch64::ORRWrs: case AArch64::ORRXrs: { Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll index cc1ba5c9c1536..c918f209bfda7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -557,9 +557,8 @@ define i64 @orr_32_eor_64() nounwind { define i64 @movn_0_eon() { ; CHECK-LABEL: movn_0_eon: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #43690 // =0xaaaa -; CHECK-NEXT: movk x0, #21845, lsl #32 -; CHECK-NEXT: movk x0, #65535, lsl #48 +; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555 +; CHECK-NEXT: eon x0, x0, x0, lsl #32 ; CHECK-NEXT: ret ret i64 u0xffff55550000aaaa } @@ -567,9 +566,8 @@ define i64 @movn_0_eon() { define i64 @movn_1_eon() { ; CHECK-LABEL: movn_1_eon: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #2863267840 // =0xaaaa0000 -; CHECK-NEXT: movk x0, #65535, lsl #32 -; CHECK-NEXT: movk x0, #21845, lsl #48 +; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff +; CHECK-NEXT: eon x0, x0, x0, lsl #32 ; CHECK-NEXT: ret ret i64 u0x5555ffffaaaa0000 } @@ -577,9 +575,8 @@ define i64 @movn_1_eon() { define i64 @movn_0_eor() { ; CHECK-LABEL: movn_0_eor: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #21845 // =0x5555 -; CHECK-NEXT: movk x0, #65535, lsl #16 -; CHECK-NEXT: movk x0, #43690, lsl #32 +; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555 +; CHECK-NEXT: eor x0, x0, x0, lsl #32 ; CHECK-NEXT: ret ret i64 u0x0000aaaaffff5555 } @@ -587,9 +584,8 @@ define i64 @movn_0_eor() { define i64 @movn_1_eor() { ; CHECK-LABEL: movn_1_eor: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #65535 // =0xffff -; CHECK-NEXT: movk x0, #21845, lsl #16 -; CHECK-NEXT: movk x0, #43690, lsl #48 +; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff +; CHECK-NEXT: eor x0, x0, x0, lsl #32 ; CHECK-NEXT: ret ret i64 u0xaaaa00005555ffff } @@ -597,10 +593,9 @@ define i64 @movn_1_eor() { define i64 @movn_movk_eor() { ; CHECK-LABEL: movn_movk_eor: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #43690 // =0xaaaa +; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa ; CHECK-NEXT: movk x0, #52428, lsl #16 -; CHECK-NEXT: movk x0, #21845, lsl #32 -; CHECK-NEXT: movk x0, #13107, lsl #48 +; CHECK-NEXT: eor x0, x0, x0, lsl #32 ; CHECK-NEXT: ret ret i64 u0x33335555ccccaaaa } From fde8a5fea7f89c24986f56c6160dfc698a8e5e9e Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Thu, 9 Oct 2025 01:43:20 -0700 Subject: [PATCH 3/5] Add separate calls for two/three sequences. --- llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp index 184c45448dc9a..da2b343f26b7c 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp @@ -259,7 +259,7 @@ static bool trySequenceOfOnes(uint64_t UImm, // =0xffff56780000a987. // In any of these cases, the expansion with EOR/EON saves an instruction // compared to the default expansion based on MOV and MOVKs. -static bool tryCopyWithNegation(uint64_t Imm, +static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence, SmallVectorImpl &Insn) { // We need the negation of the upper half of Imm to match the lower half. // Degenerate cases where Imm is a run of ones should be handled separately. @@ -278,6 +278,8 @@ static bool tryCopyWithNegation(uint64_t Imm, unsigned Imm0 = Imm & Mask; unsigned Imm16 = (Imm >> 16) & Mask; + if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence) + return false; if (Imm0 != Mask) { Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0}); if (Imm16 != Mask) @@ -665,15 +667,14 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize, if (tryEorOfLogicalImmediates(UImm, Insn)) return; + // Attempt to use a sequence of MOVN+EOR/EON (shifted register). + if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/false, Insn)) + return; + // FIXME: Add more two-instruction sequences. // Three instruction sequences. - - // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register). - // The MOVK can be avoided if Imm contains a zero / one chunk. - if (tryCopyWithNegation(Imm, Insn)) - return; - + // // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly // the fastest sequence with fast literal generation. (If neither MOVK is // part of a fast literal generation pair, it could be slower than the @@ -697,6 +698,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize, if (BitSize == 64 && trySequenceOfOnes(UImm, Insn)) return; + // Attempt to use a sequence of MOVN+MOVK+EOR (shifted register). + if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn)) + return; + // We found no possible two or three instruction sequence; use the general // four-instruction sequence. expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn); From 39752d3004af95b3ac4783d15e4c9821810e9f99 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Tue, 21 Oct 2025 07:11:36 -0700 Subject: [PATCH 4/5] Test other shift amounts. --- llvm/test/CodeGen/AArch64/arm64-movi.ll | 144 ++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll index c918f209bfda7..e1c78ec8c843e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -554,8 +554,18 @@ define i64 @orr_32_eor_64() nounwind { ; Tests for EOR / EON with MOVN. ;==--------------------------------------------------------------------------== -define i64 @movn_0_eon() { -; CHECK-LABEL: movn_0_eon: +define i64 @movn_0_eon_lsl_17() { +; CHECK-LABEL: movn_0_eon_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4370 // =0xffffffffffffeeee +; CHECK-NEXT: movk x0, #8738, lsl #16 +; CHECK-NEXT: movk x0, #65534, lsl #32 +; CHECK-NEXT: ret + ret i64 u0xfffffffe2222eeee +} + +define i64 @movn_0_eon_lsl_32() { +; CHECK-LABEL: movn_0_eon_lsl_32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555 ; CHECK-NEXT: eon x0, x0, x0, lsl #32 @@ -563,8 +573,28 @@ define i64 @movn_0_eon() { ret i64 u0xffff55550000aaaa } -define i64 @movn_1_eon() { -; CHECK-LABEL: movn_1_eon: +define i64 @movn_0_eon_lsl_47() { +; CHECK-LABEL: movn_0_eon_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #61166 // =0xeeee +; CHECK-NEXT: movk x0, #32768, lsl #32 +; CHECK-NEXT: movk x0, #34952, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x888880000000eeee +} + +define i64 @movn_1_eon_lsl_17() { +; CHECK-LABEL: movn_1_eon_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #286261248 // =0x11100000 +; CHECK-NEXT: movk x0, #8739, lsl #32 +; CHECK-NEXT: movk x0, #65534, lsl #48 +; CHECK-NEXT: ret + ret i64 u0xfffe222311100000 +} + +define i64 @movn_1_eon_lsl_32() { +; CHECK-LABEL: movn_1_eon_lsl_32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff ; CHECK-NEXT: eon x0, x0, x0, lsl #32 @@ -572,8 +602,28 @@ define i64 @movn_1_eon() { ret i64 u0x5555ffffaaaa0000 } -define i64 @movn_0_eor() { -; CHECK-LABEL: movn_0_eor: +define i64 @movn_1_eon_lsl_46() { +; CHECK-LABEL: movn_1_eon_lsl_46: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4008509440 // =0xeeed0000 +; CHECK-NEXT: movk x0, #49152, lsl #32 +; CHECK-NEXT: movk x0, #49151, lsl #48 +; CHECK-NEXT: ret + ret i64 u0xbfffc000eeed0000 +} + +define i64 @movn_0_eor_lsl_17() { +; CHECK-LABEL: movn_0_eor_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4369 // =0x1111 +; CHECK-NEXT: movk x0, #56797, lsl #16 +; CHECK-NEXT: movk x0, #1, lsl #32 +; CHECK-NEXT: ret + ret i64 u0x00000001dddd1111 +} + +define i64 @movn_0_eor_lsl_32() { +; CHECK-LABEL: movn_0_eor_lsl_32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555 ; CHECK-NEXT: eor x0, x0, x0, lsl #32 @@ -581,8 +631,28 @@ define i64 @movn_0_eor() { ret i64 u0x0000aaaaffff5555 } -define i64 @movn_1_eor() { -; CHECK-LABEL: movn_1_eor: +define i64 @movn_0_eor_lsl_47() { +; CHECK-LABEL: movn_0_eor_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: movk x0, #32767, lsl #32 +; CHECK-NEXT: movk x0, #30583, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x77777fffffff1111 +} + +define i64 @movn_1_eor_lsl_17() { +; CHECK-LABEL: movn_1_eor_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-286261249 // =0xffffffffeeefffff +; CHECK-NEXT: movk x0, #56796, lsl #32 +; CHECK-NEXT: movk x0, #1, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x0001dddceeefffff +} + +define i64 @movn_1_eor_lsl_32() { +; CHECK-LABEL: movn_1_eor_lsl_32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff ; CHECK-NEXT: eor x0, x0, x0, lsl #32 @@ -590,8 +660,51 @@ define i64 @movn_1_eor() { ret i64 u0xaaaa00005555ffff } -define i64 @movn_movk_eor() { -; CHECK-LABEL: movn_movk_eor: +define i64 @movn_1_eor_lsl_46() { +; CHECK-LABEL: movn_1_eor_lsl_46: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #-4008509441 +; CHECK-NEXT: movk x0, #16383, lsl #32 +; CHECK-NEXT: movk x0, #16384, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x40003fff1112ffff +} + +define i64 @movn_movk_eon_lsl_17() { +; CHECK-LABEL: movn_movk_eon_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #43399 // =0xa987 +; CHECK-NEXT: movk x0, #16699, lsl #16 +; CHECK-NEXT: movk x0, #9320, lsl #32 +; CHECK-NEXT: movk x0, #65534, lsl #48 +; CHECK-NEXT: ret + ret i64 u0xfffe2468413ba987 +} + +define i64 @movn_movk_eon_lsl_47() { +; CHECK-LABEL: movn_movk_eon_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #61166 // =0xeeee +; CHECK-NEXT: movk x0, #21554, lsl #16 +; CHECK-NEXT: movk x0, #32768, lsl #32 +; CHECK-NEXT: movk x0, #34952, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x888880005432eeee +} + +define i64 @movn_movk_eor_lsl_17() { +; CHECK-LABEL: movn_movk_eor_lsl_17: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #22136 // =0x5678 +; CHECK-NEXT: movk x0, #48836, lsl #16 +; CHECK-NEXT: movk x0, #56215, lsl #32 +; CHECK-NEXT: movk x0, #1, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x0001db97bec45678 +} + +define i64 @movn_movk_eor_lsl_32() { +; CHECK-LABEL: movn_movk_eor_lsl_32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa ; CHECK-NEXT: movk x0, #52428, lsl #16 @@ -599,3 +712,14 @@ define i64 @movn_movk_eor() { ; CHECK-NEXT: ret ret i64 u0x33335555ccccaaaa } + +define i64 @movn_movk_eor_lsl_47() { +; CHECK-LABEL: movn_movk_eor_lsl_47: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, #4369 // =0x1111 +; CHECK-NEXT: movk x0, #43981, lsl #16 +; CHECK-NEXT: movk x0, #32767, lsl #32 +; CHECK-NEXT: movk x0, #30583, lsl #48 +; CHECK-NEXT: ret + ret i64 u0x77777fffabcd1111 +} From 0a2928939cd0c3193505cb9c9438ec4b56c7a506 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Tue, 14 Oct 2025 04:20:42 -0700 Subject: [PATCH 5/5] Support other shift amounts. --- llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 66 +++++++++++-------- llvm/test/CodeGen/AArch64/arm64-movi.ll | 64 ++++++++---------- .../AArch64/srem-seteq-illegal-types.ll | 7 +- 3 files changed, 68 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp index da2b343f26b7c..2530daf6bb9dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp @@ -239,15 +239,18 @@ static bool trySequenceOfOnes(uint64_t UImm, return true; } -// Attempt to expand 64-bit immediate values whose negated upper half match -// the lower half (for example, 0x1234'5678'edcb'a987). -// Immediates of this form can generally be expanded via a sequence of -// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate -// the result to the upper half, e.g.: +// Attempt to expand 64-bit immediate values that consist of shifted negated +// components such as 0x1234'5678'edcb'a987, where the upper half is the +// negation of the lower half. Immediates of this form can generally be +// expanded via a sequence of MOVN+MOVK to expand the lower half, followed by +// an EOR or EON to shift and negate the result to the upper half, for example: // mov x0, #-22137 // =0xffffffffffffa987 // movk x0, #60875, lsl #16 // =0xffffffffedcba987 // eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000 // =0x12345678edcba987. +// The logic extends to other shift amounts in the range [17, 48) (outside that +// range we get runs of ones/zeros that are optimised separately). +// // When the lower half contains a 16-bit chunk of ones, such as // 0x0000'5678'ffff'a987, the intermediate MOVK is redundant. // Similarly, when it contains a 16-bit chunk of zeros, such as @@ -261,35 +264,44 @@ static bool trySequenceOfOnes(uint64_t UImm, // compared to the default expansion based on MOV and MOVKs. static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence, SmallVectorImpl &Insn) { - // We need the negation of the upper half of Imm to match the lower half. // Degenerate cases where Imm is a run of ones should be handled separately. - if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm)) + if (!Imm || llvm::isShiftedMask_64(Imm)) return false; const unsigned Mask = 0xffff; - unsigned Opc = AArch64::EORXrs; - // If we have a chunk of all zeros in the lower half, we can save a MOVK by - // materialising the negated immediate and negating the result with an EON. - if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) { - Opc = AArch64::EONXrs; - Imm = ~Imm; - } + auto tryExpansion = [&](unsigned Opc, uint64_t C, unsigned N) { + assert((C >> 32) == 0xffffffffULL && "Invalid immediate"); + const unsigned Imm0 = C & Mask; + const unsigned Imm16 = (C >> 16) & Mask; + if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence) + return false; + + if (Imm0 != Mask) { + Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0}); + if (Imm16 != Mask) + Insn.push_back({AArch64::MOVKXi, Imm16, 16}); + } else { + Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16}); + } - unsigned Imm0 = Imm & Mask; - unsigned Imm16 = (Imm >> 16) & Mask; - if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence) - return false; - if (Imm0 != Mask) { - Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0}); - if (Imm16 != Mask) - Insn.push_back({AArch64::MOVKXi, Imm16, 16}); - } else { - Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16}); + Insn.push_back({Opc, 0, N}); + return true; + }; + + for (unsigned N = 17; N < 48; ++N) { + // Attempt EOR. + uint64_t C = 0xffffffff00000000ULL | (Imm ^ (Imm << N)); + if ((C ^ (C << N)) == Imm && tryExpansion(AArch64::EORXrs, C, N)) + return true; + + // Attempt EON. + C = 0xffffffff00000000ULL | (Imm ^ ~(~Imm << N)); + if ((C ^ ~(C << N)) == Imm && tryExpansion(AArch64::EONXrs, C, N)) + return true; } - Insn.push_back({Opc, 0, 32}); - return true; + return false; } static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) { @@ -698,7 +710,7 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize, if (BitSize == 64 && trySequenceOfOnes(UImm, Insn)) return; - // Attempt to use a sequence of MOVN+MOVK+EOR (shifted register). + // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register). if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn)) return; diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll index e1c78ec8c843e..c4d33faa4eda4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-movi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll @@ -557,9 +557,8 @@ define i64 @orr_32_eor_64() nounwind { define i64 @movn_0_eon_lsl_17() { ; CHECK-LABEL: movn_0_eon_lsl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-4370 // =0xffffffffffffeeee -; CHECK-NEXT: movk x0, #8738, lsl #16 -; CHECK-NEXT: movk x0, #65534, lsl #32 +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eon x0, x0, x0, lsl #17 ; CHECK-NEXT: ret ret i64 u0xfffffffe2222eeee } @@ -576,9 +575,8 @@ define i64 @movn_0_eon_lsl_32() { define i64 @movn_0_eon_lsl_47() { ; CHECK-LABEL: movn_0_eon_lsl_47: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #61166 // =0xeeee -; CHECK-NEXT: movk x0, #32768, lsl #32 -; CHECK-NEXT: movk x0, #34952, lsl #48 +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eon x0, x0, x0, lsl #47 ; CHECK-NEXT: ret ret i64 u0x888880000000eeee } @@ -586,9 +584,8 @@ define i64 @movn_0_eon_lsl_47() { define i64 @movn_1_eon_lsl_17() { ; CHECK-LABEL: movn_1_eon_lsl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #286261248 // =0x11100000 -; CHECK-NEXT: movk x0, #8739, lsl #32 -; CHECK-NEXT: movk x0, #65534, lsl #48 +; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff +; CHECK-NEXT: eon x0, x0, x0, lsl #17 ; CHECK-NEXT: ret ret i64 u0xfffe222311100000 } @@ -605,9 +602,8 @@ define i64 @movn_1_eon_lsl_32() { define i64 @movn_1_eon_lsl_46() { ; CHECK-LABEL: movn_1_eon_lsl_46: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #4008509440 // =0xeeed0000 -; CHECK-NEXT: movk x0, #49152, lsl #32 -; CHECK-NEXT: movk x0, #49151, lsl #48 +; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff +; CHECK-NEXT: eon x0, x0, x0, lsl #46 ; CHECK-NEXT: ret ret i64 u0xbfffc000eeed0000 } @@ -615,9 +611,8 @@ define i64 @movn_1_eon_lsl_46() { define i64 @movn_0_eor_lsl_17() { ; CHECK-LABEL: movn_0_eor_lsl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #4369 // =0x1111 -; CHECK-NEXT: movk x0, #56797, lsl #16 -; CHECK-NEXT: movk x0, #1, lsl #32 +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: eor x0, x0, x0, lsl #17 ; CHECK-NEXT: ret ret i64 u0x00000001dddd1111 } @@ -635,8 +630,7 @@ define i64 @movn_0_eor_lsl_47() { ; CHECK-LABEL: movn_0_eor_lsl_47: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 -; CHECK-NEXT: movk x0, #32767, lsl #32 -; CHECK-NEXT: movk x0, #30583, lsl #48 +; CHECK-NEXT: eor x0, x0, x0, lsl #47 ; CHECK-NEXT: ret ret i64 u0x77777fffffff1111 } @@ -644,9 +638,8 @@ define i64 @movn_0_eor_lsl_47() { define i64 @movn_1_eor_lsl_17() { ; CHECK-LABEL: movn_1_eor_lsl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-286261249 // =0xffffffffeeefffff -; CHECK-NEXT: movk x0, #56796, lsl #32 -; CHECK-NEXT: movk x0, #1, lsl #48 +; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff +; CHECK-NEXT: eor x0, x0, x0, lsl #17 ; CHECK-NEXT: ret ret i64 u0x0001dddceeefffff } @@ -663,9 +656,8 @@ define i64 @movn_1_eor_lsl_32() { define i64 @movn_1_eor_lsl_46() { ; CHECK-LABEL: movn_1_eor_lsl_46: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #-4008509441 -; CHECK-NEXT: movk x0, #16383, lsl #32 -; CHECK-NEXT: movk x0, #16384, lsl #48 +; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff +; CHECK-NEXT: eor x0, x0, x0, lsl #46 ; CHECK-NEXT: ret ret i64 u0x40003fff1112ffff } @@ -673,10 +665,9 @@ define i64 @movn_1_eor_lsl_46() { define i64 @movn_movk_eon_lsl_17() { ; CHECK-LABEL: movn_movk_eon_lsl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #43399 // =0xa987 -; CHECK-NEXT: movk x0, #16699, lsl #16 -; CHECK-NEXT: movk x0, #9320, lsl #32 -; CHECK-NEXT: movk x0, #65534, lsl #48 +; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678 +; CHECK-NEXT: movk x0, #4660, lsl #16 +; CHECK-NEXT: eon x0, x0, x0, lsl #17 ; CHECK-NEXT: ret ret i64 u0xfffe2468413ba987 } @@ -684,10 +675,9 @@ define i64 @movn_movk_eon_lsl_17() { define i64 @movn_movk_eon_lsl_47() { ; CHECK-LABEL: movn_movk_eon_lsl_47: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #61166 // =0xeeee -; CHECK-NEXT: movk x0, #21554, lsl #16 -; CHECK-NEXT: movk x0, #32768, lsl #32 -; CHECK-NEXT: movk x0, #34952, lsl #48 +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 +; CHECK-NEXT: movk x0, #43981, lsl #16 +; CHECK-NEXT: eon x0, x0, x0, lsl #47 ; CHECK-NEXT: ret ret i64 u0x888880005432eeee } @@ -695,10 +685,9 @@ define i64 @movn_movk_eon_lsl_47() { define i64 @movn_movk_eor_lsl_17() { ; CHECK-LABEL: movn_movk_eor_lsl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #22136 // =0x5678 -; CHECK-NEXT: movk x0, #48836, lsl #16 -; CHECK-NEXT: movk x0, #56215, lsl #32 -; CHECK-NEXT: movk x0, #1, lsl #48 +; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678 +; CHECK-NEXT: movk x0, #4660, lsl #16 +; CHECK-NEXT: eor x0, x0, x0, lsl #17 ; CHECK-NEXT: ret ret i64 u0x0001db97bec45678 } @@ -716,10 +705,9 @@ define i64 @movn_movk_eor_lsl_32() { define i64 @movn_movk_eor_lsl_47() { ; CHECK-LABEL: movn_movk_eor_lsl_47: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x0, #4369 // =0x1111 +; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111 ; CHECK-NEXT: movk x0, #43981, lsl #16 -; CHECK-NEXT: movk x0, #32767, lsl #32 -; CHECK-NEXT: movk x0, #30583, lsl #48 +; CHECK-NEXT: eor x0, x0, x0, lsl #47 ; CHECK-NEXT: ret ret i64 u0x77777fffabcd1111 } diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll index 884d668157e5f..bd4cc62255439 100644 --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -61,14 +61,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-NEXT: sbfx x8, x0, #0, #33 ; CHECK-NEXT: sbfx x10, x1, #0, #33 ; CHECK-NEXT: movk x9, #29127, lsl #16 -; CHECK-NEXT: mov x13, #7281 // =0x1c71 +; CHECK-NEXT: mov x13, #-7282 // =0xffffffffffffe38e ; CHECK-NEXT: sbfx x12, x2, #0, #33 ; CHECK-NEXT: movk x9, #50972, lsl #32 -; CHECK-NEXT: movk x13, #29127, lsl #16 +; CHECK-NEXT: movk x13, #36408, lsl #16 ; CHECK-NEXT: movk x9, #7281, lsl #48 -; CHECK-NEXT: movk x13, #50972, lsl #32 +; CHECK-NEXT: eon x13, x13, x13, lsl #33 ; CHECK-NEXT: smulh x11, x8, x9 -; CHECK-NEXT: movk x13, #7281, lsl #48 ; CHECK-NEXT: smulh x9, x10, x9 ; CHECK-NEXT: smulh x13, x12, x13 ; CHECK-NEXT: add x11, x11, x11, lsr #63