Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions llvm/lib/Target/X86/X86CompressEVEX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
return true;
}

static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
const X86Subtarget &ST) {
uint64_t TSFlags = MI.getDesc().TSFlags;

// Check for EVEX instructions only.
Expand Down Expand Up @@ -239,14 +240,14 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
return I->NewOpc;
};

// Redundant NDD ops cannot be safely compressed if either:
// - the legacy op would introduce a partial write that BreakFalseDeps
// identified as a potential stall, or
// - the op is writing to a subregister of a live register, i.e. the
// full (zeroed) result is used.
// Both cases are indicated by an implicit def of the superregister.
Register Dst = MI.getOperand(0).getReg();
if (IsRedundantNDD) {
Register Dst = MI.getOperand(0).getReg();
// Redundant NDD ops cannot be safely compressed if either:
// - the legacy op would introduce a partial write that BreakFalseDeps
// identified as a potential stall, or
// - the op is writing to a subregister of a live register, i.e. the
// full (zeroed) result is used.
// Both cases are indicated by an implicit def of the superregister.
if (Dst &&
(X86::GR16RegClass.contains(Dst) || X86::GR8RegClass.contains(Dst))) {
Register Super = getX86SubSuperRegister(Dst, 64);
Expand All @@ -260,6 +261,33 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
if (!X86EnableAPXForRelocation)
assert(!isAddMemInstrWithRelocation(MI) &&
"Unexpected NDD instruction with relocation!");
} else if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND ||
Opc == X86::ADD32rr_ND || Opc == X86::ADD64rr_ND) {
// Non-redundant NDD ADD can be compressed to LEA when:
// - No EGPR register used and
// - EFLAGS is dead.
if (!usesExtendedRegister(MI) &&
MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) {
Register Src1 = MI.getOperand(1).getReg();
const MachineOperand &Src2 = MI.getOperand(2);
bool Is32BitReg = Opc == X86::ADD32ri_ND || Opc == X86::ADD32rr_ND;
const MCInstrDesc &NewDesc =
ST.getInstrInfo()->get(Is32BitReg ? X86::LEA32r : X86::LEA64r);
if (Is32BitReg)
Src1 = getX86SubSuperRegister(Src1, 64);
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), NewDesc, Dst)
.addReg(Src1)
.addImm(1);
if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND)
MIB.addReg(0).add(Src2);
else if (Is32BitReg)
MIB.addReg(getX86SubSuperRegister(Src2.getReg(), 64)).addImm(0);
else
MIB.add(Src2).addImm(0);
MIB.addReg(0);
MI.removeFromParent();
return true;
}
}

// NonNF -> NF only if it's not a compressible NDD instruction and eflags is
Expand Down Expand Up @@ -318,8 +346,8 @@ bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {

for (MachineBasicBlock &MBB : MF) {
// Traverse the basic block.
for (MachineInstr &MI : MBB)
Changed |= CompressEVEXImpl(MI, ST);
for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
Changed |= CompressEVEXImpl(MI, MBB, ST);
}
LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";);
return Changed;
Expand Down
28 changes: 12 additions & 16 deletions llvm/test/CodeGen/X86/apx/add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ entry:
define i32 @add32rr(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: add32rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7]
; CHECK-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add32rr:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7]
; NF-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i32 %a, %b
Expand All @@ -51,12 +51,12 @@ entry:
define i64 @add64rr(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: add64rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7]
; CHECK-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64rr:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xf7]
; NF-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i64 %a, %b
Expand Down Expand Up @@ -145,12 +145,12 @@ entry:
define i32 @add32ri8(i32 noundef %a) {
; CHECK-LABEL: add32ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b]
; CHECK-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add32ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b]
; NF-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i32 %a, 123
Expand All @@ -160,12 +160,12 @@ entry:
define i64 @add64ri8(i64 noundef %a) {
; CHECK-LABEL: add64ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b]
; CHECK-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xc7,0x7b]
; NF-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i64 %a, 123
Expand Down Expand Up @@ -207,14 +207,12 @@ entry:
define i32 @add32ri(i32 noundef %a) {
; CHECK-LABEL: add32ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add32ri:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i32 %a, 123456
Expand All @@ -224,14 +222,12 @@ entry:
define i64 @add64ri(i64 noundef %a) {
; CHECK-LABEL: add64ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64ri:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i64 %a, 123456
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/apx/mul-i1024.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1613,7 +1613,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: imulq %r23, %rdi
; EGPR-NDD-NEXT: addq %rdi, %rdx
; EGPR-NDD-NEXT: imulq 120(%r22), %r24, %rax
; EGPR-NDD-NEXT: addq %rax, %rdx, %r9
; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r9
; EGPR-NDD-NEXT: movq 96(%r22), %r20
; EGPR-NDD-NEXT: movq 104(%r22), %rdi
; EGPR-NDD-NEXT: imulq %rdi, %r26, %r10
Expand Down Expand Up @@ -1756,7 +1756,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rsi, %rdx
; EGPR-NDD-NEXT: movq 104(%r15), %r8
; EGPR-NDD-NEXT: imulq %r10, %r8, %rax
; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi
; EGPR-NDD-NEXT: leaq (%rdx,%rax), %rsi
; EGPR-NDD-NEXT: movq 112(%r15), %rax
; EGPR-NDD-NEXT: imulq %r23, %rax, %r9
; EGPR-NDD-NEXT: mulq %r16
Expand Down Expand Up @@ -1793,7 +1793,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rax, %r9
; EGPR-NDD-NEXT: addq %r8, %rdx
; EGPR-NDD-NEXT: imulq %r16, %r25, %rax
; EGPR-NDD-NEXT: addq %rax, %rdx, %r8
; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r8
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
; EGPR-NDD-NEXT: imulq %r23, %r24, %r16
; EGPR-NDD-NEXT: movq %r24, %rax
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
define signext i16 @partial_write(ptr %p, i32 %a, i32 %b, i16 signext %x, i16 signext %y) #0 {
; RCDEFAULT-LABEL: partial_write:
; RCDEFAULT: # %bb.0: # %entry
; RCDEFAULT-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
; RCDEFAULT-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
; RCDEFAULT-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
; RCDEFAULT-NEXT: addw %cx, %ax, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0xc8]
; RCDEFAULT-NEXT: retq # encoding: [0xc3]
;
; RC1-LABEL: partial_write:
; RC1: # %bb.0: # %entry
; RC1-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
; RC1-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
; RC1-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
; RC1-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
; RC1-NEXT: retq # encoding: [0xc3]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/apx/shl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,12 @@ entry:
define i32 @shl32r1(i32 noundef %a) {
; CHECK-LABEL: shl32r1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff]
; CHECK-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: shl32r1:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff]
; NF-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%shl = shl i32 %a, 1
Expand All @@ -411,12 +411,12 @@ entry:
define i64 @shl64r1(i64 noundef %a) {
; CHECK-LABEL: shl64r1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff]
; CHECK-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: shl64r1:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addq %rdi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xff]
; NF-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%shl = shl i64 %a, 1
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/apx/sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -207,14 +207,12 @@ entry:
define i32 @sub32ri(i32 noundef %a) {
; CHECK-LABEL: sub32ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addl $-123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
; CHECK-NEXT: # imm = 0xFFFE1DC0
; CHECK-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: sub32ri:
; NF: # %bb.0: # %entry
; NF-NEXT: {nf} addl $-123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
; NF-NEXT: # imm = 0xFFFE1DC0
; NF-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = sub i32 %a, 123456
Expand Down
Loading