[AArch64] Allow splitting bitmasks for EOR/ORR. #150394

rj-jesus · 2025-07-24T09:42:03Z

This patch extends #149095 for EOR and ORR.

It uses a simple partition scheme to try to find two suitable disjoint
bitmasks that can be used with EOR/ORR to reconstruct the original mask.

Fixes: #148987.

llvmbot · 2025-07-24T09:42:40Z

@llvm/pr-subscribers-backend-aarch64

Author: Ricardo Jesus (rj-jesus)

Changes

This patch extends #149095 for EOR and ORR.

It uses a simple partition scheme to try to find two suitable disjoint
bitmasks that can be used with EOR/ORR to reconstruct the original mask.

It also restructures the original code to allow reusing its
infrastructure and more easily adding other partition schemes in the
future.

Fixes: #148987.

Full diff: https://github.com/llvm/llvm-project/pull/150394.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+93-26)
(renamed) llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll (+172)

diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index abcd5505f735b..68a6f931c42d8 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -8,11 +8,11 @@
 //
 // This pass performs below peephole optimizations on MIR level.
 //
-// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
-//    MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
+// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
+//    MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
 //
 // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
-//    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
+//    MOVi64imm + ADDXrr ==> ADDXri + ADDXri
 //
 // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
 //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
@@ -125,8 +125,14 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   template <typename T>
   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
 
+  // Strategy used to split logical immediate bitmasks.
+  enum class SplitStrategy {
+    Intersect,
+    Disjoint,
+  };
   template <typename T>
-  bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0);
+  bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
+                          SplitStrategy Strategy, unsigned OtherOpc = 0);
   bool visitORR(MachineInstr &MI);
   bool visitCSEL(MachineInstr &MI);
   bool visitINSERT(MachineInstr &MI);
@@ -158,14 +164,6 @@ INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
 template <typename T>
 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
   T UImm = static_cast<T>(Imm);
-  if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
-    return false;
-
-  // If this immediate can be handled by one instruction, do not split it.
-  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
-  AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
-  if (Insn.size() == 1)
-    return false;
 
   // The bitmask immediate consists of consecutive ones.  Let's say there is
   // constant 0b00000000001000000000010000000000 which does not consist of
@@ -194,23 +192,72 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
 }
 
 template <typename T>
-bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI,
-                                    unsigned OtherOpc) {
-  // Try below transformation.
+static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
+                                    T &Imm2Enc) {
+  // Try to split a bitmask of the form 0b00000000011000000000011110000000 into
+  // two disjoint masks such as 0b00000000011000000000000000000000 and
+  // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
+  // new masks match the original mask.
+  unsigned LowestBitSet = llvm::countr_zero(Imm);
+  unsigned LowestGapBitUnset =
+      LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
+
+  // Create a mask for the least significant group of consecutive ones.
+  T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
+              (static_cast<T>(1) << LowestBitSet);
+  // Create a disjoint mask for the remaining ones.
+  T NewImm2 = Imm & ~NewImm1;
+  assert(((NewImm1 & NewImm2) == 0) && "Non-disjoint immediates!");
+
+  if (AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) {
+    assert(((NewImm1 | NewImm2) == Imm) && "Invalid immediates!");
+    Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
+    Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
+    return true;
+  }
+
+  return false;
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
+                                              SplitStrategy Strategy,
+                                              unsigned OtherOpc) {
+  // Try the transformations below.
   //
-  // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
-  // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
+  // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
+  // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
   //
   // The mov pseudo instruction could be expanded to multiple mov instructions
   // later. Let's try to split the constant operand of mov instruction into two
-  // bitmask immediates. It makes only two AND instructions instead of multiple
-  // mov + and instructions.
+  // bitmask immediates based on the given split strategy. It makes only two
+  // logical instructions instead of multiple mov + logic instructions.
 
   return splitTwoPartImm<T>(
       MI,
-      [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
-                      T &Imm1) -> std::optional<OpcodePair> {
-        if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
+      [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
+                                T &Imm1) -> std::optional<OpcodePair> {
+        // If this immediate is already a suitable bitmask, don't do anything.
+        // TODO: Should we just combine the two instructions in this case?
+        if (AArch64_AM::isLogicalImmediate(Imm, RegSize))
+          return std::nullopt;
+
+        // If this immediate can be handled by one instruction, do not split it.
+        SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+        AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
+        if (Insn.size() == 1)
+          return std::nullopt;
+
+        bool SplitSucc = false;
+        switch (Strategy) {
+        case SplitStrategy::Intersect:
+          SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
+          break;
+        case SplitStrategy::Disjoint:
+          SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
+          break;
+        }
+        if (SplitSucc)
           return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
         return std::nullopt;
       },
@@ -859,16 +906,36 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
         Changed |= visitINSERT(MI);
         break;
       case AArch64::ANDWrr:
-        Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
+        Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri, MI,
+                                                SplitStrategy::Intersect);
         break;
       case AArch64::ANDXrr:
-        Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
+        Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri, MI,
+                                                SplitStrategy::Intersect);
         break;
       case AArch64::ANDSWrr:
-        Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);
+        Changed |= trySplitLogicalImm<uint32_t>(
+            AArch64::ANDWri, MI, SplitStrategy::Intersect, AArch64::ANDSWri);
         break;
       case AArch64::ANDSXrr:
-        Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);
+        Changed |= trySplitLogicalImm<uint64_t>(
+            AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
+        break;
+      case AArch64::EORWrr:
+        Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
+                                                SplitStrategy::Disjoint);
+        break;
+      case AArch64::EORXrr:
+        Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
+                                                SplitStrategy::Disjoint);
+        break;
+      case AArch64::ORRWrr:
+        Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
+                                                SplitStrategy::Disjoint);
+        break;
+      case AArch64::ORRXrr:
+        Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
+                                                SplitStrategy::Disjoint);
         break;
       case AArch64::ORRWrs:
         Changed |= visitORR(MI);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
similarity index 71%
rename from llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
rename to llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
index 113eb14ca4803..4db9db9185206 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
@@ -370,3 +370,175 @@ entry:
   %r = select i1 %c, i64 %a, i64 %ands
   ret i64 %r
 }
+
+; Test EOR.
+define i32 @test1_eor(i32 %a) {
+; CHECK-LABEL: test1_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor w8, w0, #0x400
+; CHECK-NEXT:    eor w0, w8, #0x200000
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i32 %a, 2098176
+  ret i32 %eor
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_eor(i32 %a) {
+; CHECK-LABEL: test2_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #135 // =0x87
+; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i32 %a, 135
+  ret i32 %eor
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_eor(i32 %a) {
+; CHECK-LABEL: test3_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #33, lsl #16
+; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i32 %a, 2163712
+  ret i32 %eor
+}
+
+define i64 @test4_eor(i64 %a) {
+; CHECK-LABEL: test4_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor x8, x0, #0x400
+; CHECK-NEXT:    eor x0, x8, #0x200000
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i64 %a, 2098176
+  ret i64 %eor
+}
+
+define i64 @test5_eor(i64 %a) {
+; CHECK-LABEL: test5_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor x8, x0, #0x4000
+; CHECK-NEXT:    eor x0, x8, #0x200000000
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i64 %a, 8589950976
+  ret i64 %eor
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_eor(i64 %a) {
+; CHECK-LABEL: test6_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #135 // =0x87
+; CHECK-NEXT:    eor x0, x0, x8
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i64 %a, 135
+  ret i64 %eor
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_eor(i64 %a) {
+; CHECK-LABEL: test7_eor:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #33, lsl #16
+; CHECK-NEXT:    eor x0, x0, x8
+; CHECK-NEXT:    ret
+entry:
+  %eor = xor i64 %a, 2163712
+  ret i64 %eor
+}
+
+; Test ORR.
+define i32 @test1_orr(i32 %a) {
+; CHECK-LABEL: test1_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr w8, w0, #0x400
+; CHECK-NEXT:    orr w0, w8, #0x200000
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i32 %a, 2098176
+  ret i32 %orr
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_orr(i32 %a) {
+; CHECK-LABEL: test2_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #135 // =0x87
+; CHECK-NEXT:    orr w0, w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i32 %a, 135
+  ret i32 %orr
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_orr(i32 %a) {
+; CHECK-LABEL: test3_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #33, lsl #16
+; CHECK-NEXT:    orr w0, w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i32 %a, 2163712
+  ret i32 %orr
+}
+
+define i64 @test4_orr(i64 %a) {
+; CHECK-LABEL: test4_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x0, #0x400
+; CHECK-NEXT:    orr x0, x8, #0x200000
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i64 %a, 2098176
+  ret i64 %orr
+}
+
+define i64 @test5_orr(i64 %a) {
+; CHECK-LABEL: test5_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x0, #0x4000
+; CHECK-NEXT:    orr x0, x8, #0x200000000
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i64 %a, 8589950976
+  ret i64 %orr
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_orr(i64 %a) {
+; CHECK-LABEL: test6_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #135 // =0x87
+; CHECK-NEXT:    orr x0, x0, x8
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i64 %a, 135
+  ret i64 %orr
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_orr(i64 %a) {
+; CHECK-LABEL: test7_orr:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #33, lsl #16
+; CHECK-NEXT:    orr x0, x0, x8
+; CHECK-NEXT:    ret
+entry:
+  %orr = or i64 %a, 2163712
+  ret i64 %orr
+}

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

This patch generalises the logic for splitting bitmasks for AND/ANDS immediate instructions, to prepare it to handle more instructions as in llvm#150394.

This patch generalises the logic for splitting bitmasks for AND/ANDS immediate instructions, to prepare it to handle more opcodes, as in #150394.

rj-jesus · 2025-07-30T09:04:11Z

Rebased on top of #150619.

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

This patch extends llvm#149095 for EOR and ORR. It uses a simple partition scheme to try to find two suitable disjoint bitmasks that can be used with EOR/ORR to reconstruct the original mask.

rj-jesus · 2025-08-01T07:51:15Z

Rebased to attempt to sort out the CI failures on CodeGen/AMDGPU/fptrunc.f16.ll.

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

AZero13 · 2025-08-07T15:15:00Z

Thank you so much! I have been trying to figure out how to do this for while but then other PRs got caught up.

out of curiosity, have you read https://devblogs.microsoft.com/oldnewthing/20220808-00/?p=106953

because it also talks about this

AZero13 · 2025-08-07T15:16:01Z

For what it is worth, I wonder if the assembler can take care of many of the situations for us where we can materialize a constant via adds and subs + zero reg or we have to do that ourselves.

AZero13 · 2025-08-07T15:17:21Z

For example, if we wanted to materialize a number that had the form: 0xFFFFFFFF`FFFFFXXX,

We can do sub Xd, xzr, #imm12

rj-jesus · 2025-08-07T17:03:03Z

Hi @AZero13, I'm glad you found it useful! The link you posted has a few interesting patterns (although the specific one you mentioned seems to be a MOVN).

You might also find #130376 interesting.

This patch generalises the logic for splitting bitmasks for AND/ANDS immediate instructions, to prepare it to handle more opcodes, as in llvm#150394.

rj-jesus requested review from davemgreen, paulwalker-arm and david-arm July 24, 2025 09:42

llvmbot added the backend:AArch64 label Jul 24, 2025

rj-jesus commented Jul 24, 2025

View reviewed changes

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp Outdated Show resolved Hide resolved

rj-jesus mentioned this pull request Jul 25, 2025

[AArch64] Refactor AND/ANDS bitmask splitting (NFC). #150619

Merged

rj-jesus added a commit that referenced this pull request Jul 30, 2025

[AArch64] Refactor AND/ANDS bitmask splitting (NFC). (#150619)

7a0024d

This patch generalises the logic for splitting bitmasks for AND/ANDS immediate instructions, to prepare it to handle more opcodes, as in #150394.

rj-jesus force-pushed the rjj/aarch64-split-eor-orr-bitmask branch from 68a008d to 2c933ed Compare July 30, 2025 09:03

paulwalker-arm reviewed Jul 31, 2025

View reviewed changes

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp Show resolved Hide resolved

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp Outdated Show resolved Hide resolved

rj-jesus added 3 commits August 1, 2025 00:40

Add tests.

cfce02a

[AArch64] Allow splitting bitmasks for EOR/ORR.

a2424d8

This patch extends llvm#149095 for EOR and ORR. It uses a simple partition scheme to try to find two suitable disjoint bitmasks that can be used with EOR/ORR to reconstruct the original mask.

Improve asserts and restructure code flow.

9627500

rj-jesus force-pushed the rjj/aarch64-split-eor-orr-bitmask branch from 4030752 to 9627500 Compare August 1, 2025 07:49

paulwalker-arm approved these changes Aug 1, 2025

View reviewed changes

Guard code against UB.

7ca15a7

rj-jesus commented Aug 6, 2025

View reviewed changes

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp Show resolved Hide resolved

rj-jesus merged commit 565f707 into llvm:main Aug 7, 2025
9 checks passed

rj-jesus deleted the rjj/aarch64-split-eor-orr-bitmask branch August 7, 2025 09:42

rj-jesus mentioned this pull request Aug 7, 2025

[AArch64] Avoid materialising bitwise immediate arguments by decomposing into 2 arguments #148987

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Allow splitting bitmasks for EOR/ORR. #150394

[AArch64] Allow splitting bitmasks for EOR/ORR. #150394

Uh oh!

rj-jesus commented Jul 24, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 24, 2025

Uh oh!

Uh oh!

rj-jesus commented Jul 30, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

rj-jesus commented Aug 1, 2025

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Aug 7, 2025

Uh oh!

AZero13 commented Aug 7, 2025

Uh oh!

AZero13 commented Aug 7, 2025

Uh oh!

rj-jesus commented Aug 7, 2025

Uh oh!

Uh oh!

[AArch64] Allow splitting bitmasks for EOR/ORR. #150394

[AArch64] Allow splitting bitmasks for EOR/ORR. #150394

Uh oh!

Conversation

rj-jesus commented Jul 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 24, 2025

Uh oh!

Uh oh!

rj-jesus commented Jul 30, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

rj-jesus commented Aug 1, 2025

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Aug 7, 2025

Uh oh!

AZero13 commented Aug 7, 2025

Uh oh!

AZero13 commented Aug 7, 2025

Uh oh!

rj-jesus commented Aug 7, 2025

Uh oh!

Uh oh!

rj-jesus commented Jul 24, 2025 •

edited

Loading