Skip to content

Conversation

phoebewang
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 12, 2025

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/158254.diff

6 Files Affected:

  • (modified) llvm/lib/Target/X86/X86CompressEVEX.cpp (+38-10)
  • (modified) llvm/test/CodeGen/X86/apx/add.ll (+12-16)
  • (modified) llvm/test/CodeGen/X86/apx/mul-i1024.ll (+3-3)
  • (modified) llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir (+2-2)
  • (modified) llvm/test/CodeGen/X86/apx/shl.ll (+4-4)
  • (modified) llvm/test/CodeGen/X86/apx/sub.ll (+2-4)
diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp
index 4ea30de78402f..c0c7f5adf06ef 100644
--- a/llvm/lib/Target/X86/X86CompressEVEX.cpp
+++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp
@@ -174,7 +174,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
   return true;
 }
 
-static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
+static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
+                             const X86Subtarget &ST) {
   uint64_t TSFlags = MI.getDesc().TSFlags;
 
   // Check for EVEX instructions only.
@@ -239,14 +240,14 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
     return I->NewOpc;
   };
 
-  // Redundant NDD ops cannot be safely compressed if either:
-  // - the legacy op would introduce a partial write that BreakFalseDeps
-  // identified as a potential stall, or
-  // - the op is writing to a subregister of a live register, i.e. the
-  // full (zeroed) result is used.
-  // Both cases are indicated by an implicit def of the superregister.
+  Register Dst = MI.getOperand(0).getReg();
   if (IsRedundantNDD) {
-    Register Dst = MI.getOperand(0).getReg();
+    // Redundant NDD ops cannot be safely compressed if either:
+    // - the legacy op would introduce a partial write that BreakFalseDeps
+    // identified as a potential stall, or
+    // - the op is writing to a subregister of a live register, i.e. the
+    // full (zeroed) result is used.
+    // Both cases are indicated by an implicit def of the superregister.
     if (Dst &&
         (X86::GR16RegClass.contains(Dst) || X86::GR8RegClass.contains(Dst))) {
       Register Super = getX86SubSuperRegister(Dst, 64);
@@ -260,6 +261,33 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
     if (!X86EnableAPXForRelocation)
       assert(!isAddMemInstrWithRelocation(MI) &&
              "Unexpected NDD instruction with relocation!");
+  } else if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND ||
+             Opc == X86::ADD32rr_ND || Opc == X86::ADD64rr_ND) {
+    // Non-redundant NDD ADD can be compressed to LEA when:
+    // - No EGPR register used and
+    // - EFLAGS is dead.
+    if (!usesExtendedRegister(MI) &&
+        MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) {
+      Register Src1 = MI.getOperand(1).getReg();
+      const MachineOperand &Src2 = MI.getOperand(2);
+      bool Is32BitReg = Opc == X86::ADD32ri_ND || Opc == X86::ADD32rr_ND;
+      const MCInstrDesc &NewDesc =
+          ST.getInstrInfo()->get(Is32BitReg ? X86::LEA32r : X86::LEA64r);
+      if (Is32BitReg)
+        Src1 = getX86SubSuperRegister(Src1, 64);
+      MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), NewDesc, Dst)
+                                    .addReg(Src1)
+                                    .addImm(1);
+      if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND)
+        MIB.addReg(0).add(Src2);
+      else if (Is32BitReg)
+        MIB.addReg(getX86SubSuperRegister(Src2.getReg(), 64)).addImm(0);
+      else
+        MIB.add(Src2).addImm(0);
+      MIB.addReg(0);
+      MI.removeFromParent();
+      return true;
+    }
   }
 
   // NonNF -> NF only if it's not a compressible NDD instruction and eflags is
@@ -318,8 +346,8 @@ bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineBasicBlock &MBB : MF) {
     // Traverse the basic block.
-    for (MachineInstr &MI : MBB)
-      Changed |= CompressEVEXImpl(MI, ST);
+    for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+      Changed |= CompressEVEXImpl(MI, MBB, ST);
   }
   LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";);
   return Changed;
diff --git a/llvm/test/CodeGen/X86/apx/add.ll b/llvm/test/CodeGen/X86/apx/add.ll
index 86343811901a9..4ab0edfba7ce8 100644
--- a/llvm/test/CodeGen/X86/apx/add.ll
+++ b/llvm/test/CodeGen/X86/apx/add.ll
@@ -36,12 +36,12 @@ entry:
 define i32 @add32rr(i32 noundef %a, i32 noundef %b) {
 ; CHECK-LABEL: add32rr:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7]
+; CHECK-NEXT:    leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: add32rr:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7]
+; NF-NEXT:    leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %add = add i32 %a, %b
@@ -51,12 +51,12 @@ entry:
 define i64 @add64rr(i64 noundef %a, i64 noundef %b) {
 ; CHECK-LABEL: add64rr:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7]
+; CHECK-NEXT:    leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: add64rr:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xf7]
+; NF-NEXT:    leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %add = add i64 %a, %b
@@ -145,12 +145,12 @@ entry:
 define i32 @add32ri8(i32 noundef %a) {
 ; CHECK-LABEL: add32ri8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b]
+; CHECK-NEXT:    leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: add32ri8:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b]
+; NF-NEXT:    leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %add = add i32 %a, 123
@@ -160,12 +160,12 @@ entry:
 define i64 @add64ri8(i64 noundef %a) {
 ; CHECK-LABEL: add64ri8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b]
+; CHECK-NEXT:    leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: add64ri8:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xc7,0x7b]
+; NF-NEXT:    leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %add = add i64 %a, 123
@@ -207,14 +207,12 @@ entry:
 define i32 @add32ri(i32 noundef %a) {
 ; CHECK-LABEL: add32ri:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; CHECK-NEXT:    # imm = 0x1E240
+; CHECK-NEXT:    leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: add32ri:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; NF-NEXT:    # imm = 0x1E240
+; NF-NEXT:    leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %add = add i32 %a, 123456
@@ -224,14 +222,12 @@ entry:
 define i64 @add64ri(i64 noundef %a) {
 ; CHECK-LABEL: add64ri:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; CHECK-NEXT:    # imm = 0x1E240
+; CHECK-NEXT:    leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: add64ri:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; NF-NEXT:    # imm = 0x1E240
+; NF-NEXT:    leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %add = add i64 %a, 123456
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index a29a92176f432..0bb3b179cc305 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -1613,7 +1613,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
 ; EGPR-NDD-NEXT:    imulq %r23, %rdi
 ; EGPR-NDD-NEXT:    addq %rdi, %rdx
 ; EGPR-NDD-NEXT:    imulq 120(%r22), %r24, %rax
-; EGPR-NDD-NEXT:    addq %rax, %rdx, %r9
+; EGPR-NDD-NEXT:    leaq (%rdx,%rax), %r9
 ; EGPR-NDD-NEXT:    movq 96(%r22), %r20
 ; EGPR-NDD-NEXT:    movq 104(%r22), %rdi
 ; EGPR-NDD-NEXT:    imulq %rdi, %r26, %r10
@@ -1756,7 +1756,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
 ; EGPR-NDD-NEXT:    addq %rsi, %rdx
 ; EGPR-NDD-NEXT:    movq 104(%r15), %r8
 ; EGPR-NDD-NEXT:    imulq %r10, %r8, %rax
-; EGPR-NDD-NEXT:    addq %rax, %rdx, %rsi
+; EGPR-NDD-NEXT:    leaq (%rdx,%rax), %rsi
 ; EGPR-NDD-NEXT:    movq 112(%r15), %rax
 ; EGPR-NDD-NEXT:    imulq %r23, %rax, %r9
 ; EGPR-NDD-NEXT:    mulq %r16
@@ -1793,7 +1793,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
 ; EGPR-NDD-NEXT:    movq %rax, %r9
 ; EGPR-NDD-NEXT:    addq %r8, %rdx
 ; EGPR-NDD-NEXT:    imulq %r16, %r25, %rax
-; EGPR-NDD-NEXT:    addq %rax, %rdx, %r8
+; EGPR-NDD-NEXT:    leaq (%rdx,%rax), %r8
 ; EGPR-NDD-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
 ; EGPR-NDD-NEXT:    imulq %r23, %r24, %r16
 ; EGPR-NDD-NEXT:    movq %r24, %rax
diff --git a/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir b/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
index 5be5ca8d71947..bfc0120765e53 100644
--- a/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
+++ b/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
@@ -15,14 +15,14 @@
   define signext i16 @partial_write(ptr %p, i32 %a, i32 %b, i16 signext %x, i16 signext %y) #0 {
   ; RCDEFAULT-LABEL: partial_write:
   ; RCDEFAULT:       # %bb.0: # %entry
-  ; RCDEFAULT-NEXT:    addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
+  ; RCDEFAULT-NEXT:    leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
   ; RCDEFAULT-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
   ; RCDEFAULT-NEXT:    addw %cx, %ax, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0xc8]
   ; RCDEFAULT-NEXT:    retq # encoding: [0xc3]
   ;
   ; RC1-LABEL: partial_write:
   ; RC1:       # %bb.0: # %entry
-  ; RC1-NEXT:    addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
+  ; RC1-NEXT:    leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
   ; RC1-NEXT:    movl %eax, (%rdi) # encoding: [0x89,0x07]
   ; RC1-NEXT:    addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
   ; RC1-NEXT:    retq # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/apx/shl.ll b/llvm/test/CodeGen/X86/apx/shl.ll
index 896cd55bc7452..9c6229a483c73 100644
--- a/llvm/test/CodeGen/X86/apx/shl.ll
+++ b/llvm/test/CodeGen/X86/apx/shl.ll
@@ -396,12 +396,12 @@ entry:
 define i32 @shl32r1(i32 noundef %a) {
 ; CHECK-LABEL: shl32r1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff]
+; CHECK-NEXT:    leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: shl32r1:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff]
+; NF-NEXT:    leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
   %shl = shl i32 %a, 1
@@ -411,12 +411,12 @@ entry:
 define i64 @shl64r1(i64 noundef %a) {
 ; CHECK-LABEL: shl64r1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff]
+; CHECK-NEXT:    leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: shl64r1:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addq %rdi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xff]
+; NF-NEXT:    leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
   %shl = shl i64 %a, 1
diff --git a/llvm/test/CodeGen/X86/apx/sub.ll b/llvm/test/CodeGen/X86/apx/sub.ll
index d7914577634e7..75ee8cf31dee5 100644
--- a/llvm/test/CodeGen/X86/apx/sub.ll
+++ b/llvm/test/CodeGen/X86/apx/sub.ll
@@ -207,14 +207,12 @@ entry:
 define i32 @sub32ri(i32 noundef %a) {
 ; CHECK-LABEL: sub32ri:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl $-123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT:    # imm = 0xFFFE1DC0
+; CHECK-NEXT:    leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; NF-LABEL: sub32ri:
 ; NF:       # %bb.0: # %entry
-; NF-NEXT:    {nf} addl $-123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
-; NF-NEXT:    # imm = 0xFFFE1DC0
+; NF-NEXT:    leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
 ; NF-NEXT:    retq # encoding: [0xc3]
 entry:
     %sub = sub i32 %a, 123456

Copy link
Contributor

@fzou1 fzou1 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@phoebewang phoebewang merged commit 14f2531 into llvm:main Sep 16, 2025
11 checks passed
@phoebewang phoebewang deleted the APX branch September 16, 2025 05:31
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants