diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp index 4ea30de78402f..c0c7f5adf06ef 100644 --- a/llvm/lib/Target/X86/X86CompressEVEX.cpp +++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp @@ -174,7 +174,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { return true; } -static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { +static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB, + const X86Subtarget &ST) { uint64_t TSFlags = MI.getDesc().TSFlags; // Check for EVEX instructions only. @@ -239,14 +240,14 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { return I->NewOpc; }; - // Redundant NDD ops cannot be safely compressed if either: - // - the legacy op would introduce a partial write that BreakFalseDeps - // identified as a potential stall, or - // - the op is writing to a subregister of a live register, i.e. the - // full (zeroed) result is used. - // Both cases are indicated by an implicit def of the superregister. + Register Dst = MI.getOperand(0).getReg(); if (IsRedundantNDD) { - Register Dst = MI.getOperand(0).getReg(); + // Redundant NDD ops cannot be safely compressed if either: + // - the legacy op would introduce a partial write that BreakFalseDeps + // identified as a potential stall, or + // - the op is writing to a subregister of a live register, i.e. the + // full (zeroed) result is used. + // Both cases are indicated by an implicit def of the superregister. if (Dst && (X86::GR16RegClass.contains(Dst) || X86::GR8RegClass.contains(Dst))) { Register Super = getX86SubSuperRegister(Dst, 64); @@ -260,6 +261,33 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { if (!X86EnableAPXForRelocation) assert(!isAddMemInstrWithRelocation(MI) && "Unexpected NDD instruction with relocation!"); + } else if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND || + Opc == X86::ADD32rr_ND || Opc == X86::ADD64rr_ND) { + // Non-redundant NDD ADD can be compressed to LEA when: + // - No EGPR register used and + // - EFLAGS is dead. + if (!usesExtendedRegister(MI) && + MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) { + Register Src1 = MI.getOperand(1).getReg(); + const MachineOperand &Src2 = MI.getOperand(2); + bool Is32BitReg = Opc == X86::ADD32ri_ND || Opc == X86::ADD32rr_ND; + const MCInstrDesc &NewDesc = + ST.getInstrInfo()->get(Is32BitReg ? X86::LEA32r : X86::LEA64r); + if (Is32BitReg) + Src1 = getX86SubSuperRegister(Src1, 64); + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), NewDesc, Dst) + .addReg(Src1) + .addImm(1); + if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND) + MIB.addReg(0).add(Src2); + else if (Is32BitReg) + MIB.addReg(getX86SubSuperRegister(Src2.getReg(), 64)).addImm(0); + else + MIB.add(Src2).addImm(0); + MIB.addReg(0); + MI.removeFromParent(); + return true; + } } // NonNF -> NF only if it's not a compressible NDD instruction and eflags is @@ -318,8 +346,8 @@ bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) { // Traverse the basic block. - for (MachineInstr &MI : MBB) - Changed |= CompressEVEXImpl(MI, ST); + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) + Changed |= CompressEVEXImpl(MI, MBB, ST); } LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";); return Changed; diff --git a/llvm/test/CodeGen/X86/apx/add.ll b/llvm/test/CodeGen/X86/apx/add.ll index 86343811901a9..4ab0edfba7ce8 100644 --- a/llvm/test/CodeGen/X86/apx/add.ll +++ b/llvm/test/CodeGen/X86/apx/add.ll @@ -36,12 +36,12 @@ entry: define i32 @add32rr(i32 noundef %a, i32 noundef %b) { ; CHECK-LABEL: add32rr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7] +; CHECK-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: add32rr: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7] +; NF-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37] ; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i32 %a, %b @@ -51,12 +51,12 @@ entry: define i64 @add64rr(i64 noundef %a, i64 noundef %b) { ; CHECK-LABEL: add64rr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7] +; CHECK-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: add64rr: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xf7] +; NF-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37] ; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i64 %a, %b @@ -145,12 +145,12 @@ entry: define i32 @add32ri8(i32 noundef %a) { ; CHECK-LABEL: add32ri8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b] +; CHECK-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: add32ri8: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b] +; NF-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b] ; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i32 %a, 123 @@ -160,12 +160,12 @@ entry: define i64 @add64ri8(i64 noundef %a) { ; CHECK-LABEL: add64ri8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b] +; CHECK-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: add64ri8: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xc7,0x7b] +; NF-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b] ; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i64 %a, 123 @@ -207,14 +207,12 @@ entry: define i32 @add32ri(i32 noundef %a) { ; CHECK-LABEL: add32ri: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00] -; CHECK-NEXT: # imm = 0x1E240 +; CHECK-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: add32ri: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00] -; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00] ; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i32 %a, 123456 @@ -224,14 +222,12 @@ entry: define i64 @add64ri(i64 noundef %a) { ; CHECK-LABEL: add64ri: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00] -; CHECK-NEXT: # imm = 0x1E240 +; CHECK-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: add64ri: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00] -; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00] ; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i64 %a, 123456 diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll index a29a92176f432..0bb3b179cc305 100644 --- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll +++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll @@ -1613,7 +1613,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; EGPR-NDD-NEXT: imulq %r23, %rdi ; EGPR-NDD-NEXT: addq %rdi, %rdx ; EGPR-NDD-NEXT: imulq 120(%r22), %r24, %rax -; EGPR-NDD-NEXT: addq %rax, %rdx, %r9 +; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r9 ; EGPR-NDD-NEXT: movq 96(%r22), %r20 ; EGPR-NDD-NEXT: movq 104(%r22), %rdi ; EGPR-NDD-NEXT: imulq %rdi, %r26, %r10 @@ -1756,7 +1756,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; EGPR-NDD-NEXT: addq %rsi, %rdx ; EGPR-NDD-NEXT: movq 104(%r15), %r8 ; EGPR-NDD-NEXT: imulq %r10, %r8, %rax -; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi +; EGPR-NDD-NEXT: leaq (%rdx,%rax), %rsi ; EGPR-NDD-NEXT: movq 112(%r15), %rax ; EGPR-NDD-NEXT: imulq %r23, %rax, %r9 ; EGPR-NDD-NEXT: mulq %r16 @@ -1793,7 +1793,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; EGPR-NDD-NEXT: movq %rax, %r9 ; EGPR-NDD-NEXT: addq %r8, %rdx ; EGPR-NDD-NEXT: imulq %r16, %r25, %rax -; EGPR-NDD-NEXT: addq %rax, %rdx, %r8 +; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r8 ; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload ; EGPR-NDD-NEXT: imulq %r23, %r24, %r16 ; EGPR-NDD-NEXT: movq %r24, %rax diff --git a/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir b/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir index 5be5ca8d71947..bfc0120765e53 100644 --- a/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir +++ b/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir @@ -15,14 +15,14 @@ define signext i16 @partial_write(ptr %p, i32 %a, i32 %b, i16 signext %x, i16 signext %y) #0 { ; RCDEFAULT-LABEL: partial_write: ; RCDEFAULT: # %bb.0: # %entry - ; RCDEFAULT-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2] + ; RCDEFAULT-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32] ; RCDEFAULT-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] ; RCDEFAULT-NEXT: addw %cx, %ax, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0xc8] ; RCDEFAULT-NEXT: retq # encoding: [0xc3] ; ; RC1-LABEL: partial_write: ; RC1: # %bb.0: # %entry - ; RC1-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2] + ; RC1-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32] ; RC1-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] ; RC1-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8] ; RC1-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/apx/shl.ll b/llvm/test/CodeGen/X86/apx/shl.ll index 896cd55bc7452..9c6229a483c73 100644 --- a/llvm/test/CodeGen/X86/apx/shl.ll +++ b/llvm/test/CodeGen/X86/apx/shl.ll @@ -396,12 +396,12 @@ entry: define i32 @shl32r1(i32 noundef %a) { ; CHECK-LABEL: shl32r1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff] +; CHECK-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: shl32r1: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff] +; NF-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f] ; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i32 %a, 1 @@ -411,12 +411,12 @@ entry: define i64 @shl64r1(i64 noundef %a) { ; CHECK-LABEL: shl64r1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff] +; CHECK-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: shl64r1: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addq %rdi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xff] +; NF-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f] ; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i64 %a, 1 diff --git a/llvm/test/CodeGen/X86/apx/sub.ll b/llvm/test/CodeGen/X86/apx/sub.ll index d7914577634e7..75ee8cf31dee5 100644 --- a/llvm/test/CodeGen/X86/apx/sub.ll +++ b/llvm/test/CodeGen/X86/apx/sub.ll @@ -207,14 +207,12 @@ entry: define i32 @sub32ri(i32 noundef %a) { ; CHECK-LABEL: sub32ri: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addl $-123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff] -; CHECK-NEXT: # imm = 0xFFFE1DC0 +; CHECK-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; NF-LABEL: sub32ri: ; NF: # %bb.0: # %entry -; NF-NEXT: {nf} addl $-123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xc0,0x1d,0xfe,0xff] -; NF-NEXT: # imm = 0xFFFE1DC0 +; NF-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff] ; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i32 %a, 123456