Skip to content

Commit 14f2531

Browse files
authored
[X86][APX] Compress non-redundant NDD ADD to LEA (#158254)
1 parent 2837370 commit 14f2531

File tree

6 files changed

+61
-39
lines changed

6 files changed

+61
-39
lines changed

llvm/lib/Target/X86/X86CompressEVEX.cpp

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
174174
return true;
175175
}
176176

177-
static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
177+
static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
178+
const X86Subtarget &ST) {
178179
uint64_t TSFlags = MI.getDesc().TSFlags;
179180

180181
// Check for EVEX instructions only.
@@ -239,14 +240,14 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
239240
return I->NewOpc;
240241
};
241242

242-
// Redundant NDD ops cannot be safely compressed if either:
243-
// - the legacy op would introduce a partial write that BreakFalseDeps
244-
// identified as a potential stall, or
245-
// - the op is writing to a subregister of a live register, i.e. the
246-
// full (zeroed) result is used.
247-
// Both cases are indicated by an implicit def of the superregister.
243+
Register Dst = MI.getOperand(0).getReg();
248244
if (IsRedundantNDD) {
249-
Register Dst = MI.getOperand(0).getReg();
245+
// Redundant NDD ops cannot be safely compressed if either:
246+
// - the legacy op would introduce a partial write that BreakFalseDeps
247+
// identified as a potential stall, or
248+
// - the op is writing to a subregister of a live register, i.e. the
249+
// full (zeroed) result is used.
250+
// Both cases are indicated by an implicit def of the superregister.
250251
if (Dst &&
251252
(X86::GR16RegClass.contains(Dst) || X86::GR8RegClass.contains(Dst))) {
252253
Register Super = getX86SubSuperRegister(Dst, 64);
@@ -260,6 +261,33 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
260261
if (!X86EnableAPXForRelocation)
261262
assert(!isAddMemInstrWithRelocation(MI) &&
262263
"Unexpected NDD instruction with relocation!");
264+
} else if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND ||
265+
Opc == X86::ADD32rr_ND || Opc == X86::ADD64rr_ND) {
266+
// Non-redundant NDD ADD can be compressed to LEA when:
267+
// - No EGPR register used and
268+
// - EFLAGS is dead.
269+
if (!usesExtendedRegister(MI) &&
270+
MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) {
271+
Register Src1 = MI.getOperand(1).getReg();
272+
const MachineOperand &Src2 = MI.getOperand(2);
273+
bool Is32BitReg = Opc == X86::ADD32ri_ND || Opc == X86::ADD32rr_ND;
274+
const MCInstrDesc &NewDesc =
275+
ST.getInstrInfo()->get(Is32BitReg ? X86::LEA32r : X86::LEA64r);
276+
if (Is32BitReg)
277+
Src1 = getX86SubSuperRegister(Src1, 64);
278+
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), NewDesc, Dst)
279+
.addReg(Src1)
280+
.addImm(1);
281+
if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND)
282+
MIB.addReg(0).add(Src2);
283+
else if (Is32BitReg)
284+
MIB.addReg(getX86SubSuperRegister(Src2.getReg(), 64)).addImm(0);
285+
else
286+
MIB.add(Src2).addImm(0);
287+
MIB.addReg(0);
288+
MI.removeFromParent();
289+
return true;
290+
}
263291
}
264292

265293
// NonNF -> NF only if it's not a compressible NDD instruction and eflags is
@@ -318,8 +346,8 @@ bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
318346

319347
for (MachineBasicBlock &MBB : MF) {
320348
// Traverse the basic block.
321-
for (MachineInstr &MI : MBB)
322-
Changed |= CompressEVEXImpl(MI, ST);
349+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
350+
Changed |= CompressEVEXImpl(MI, MBB, ST);
323351
}
324352
LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";);
325353
return Changed;

llvm/test/CodeGen/X86/apx/add.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ entry:
3636
define i32 @add32rr(i32 noundef %a, i32 noundef %b) {
3737
; CHECK-LABEL: add32rr:
3838
; CHECK: # %bb.0: # %entry
39-
; CHECK-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7]
39+
; CHECK-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
4040
; CHECK-NEXT: retq # encoding: [0xc3]
4141
;
4242
; NF-LABEL: add32rr:
4343
; NF: # %bb.0: # %entry
44-
; NF-NEXT: {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7]
44+
; NF-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
4545
; NF-NEXT: retq # encoding: [0xc3]
4646
entry:
4747
%add = add i32 %a, %b
@@ -51,12 +51,12 @@ entry:
5151
define i64 @add64rr(i64 noundef %a, i64 noundef %b) {
5252
; CHECK-LABEL: add64rr:
5353
; CHECK: # %bb.0: # %entry
54-
; CHECK-NEXT: addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7]
54+
; CHECK-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
5555
; CHECK-NEXT: retq # encoding: [0xc3]
5656
;
5757
; NF-LABEL: add64rr:
5858
; NF: # %bb.0: # %entry
59-
; NF-NEXT: {nf} addq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xf7]
59+
; NF-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
6060
; NF-NEXT: retq # encoding: [0xc3]
6161
entry:
6262
%add = add i64 %a, %b
@@ -145,12 +145,12 @@ entry:
145145
define i32 @add32ri8(i32 noundef %a) {
146146
; CHECK-LABEL: add32ri8:
147147
; CHECK: # %bb.0: # %entry
148-
; CHECK-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b]
148+
; CHECK-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
149149
; CHECK-NEXT: retq # encoding: [0xc3]
150150
;
151151
; NF-LABEL: add32ri8:
152152
; NF: # %bb.0: # %entry
153-
; NF-NEXT: {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b]
153+
; NF-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
154154
; NF-NEXT: retq # encoding: [0xc3]
155155
entry:
156156
%add = add i32 %a, 123
@@ -160,12 +160,12 @@ entry:
160160
define i64 @add64ri8(i64 noundef %a) {
161161
; CHECK-LABEL: add64ri8:
162162
; CHECK: # %bb.0: # %entry
163-
; CHECK-NEXT: addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b]
163+
; CHECK-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
164164
; CHECK-NEXT: retq # encoding: [0xc3]
165165
;
166166
; NF-LABEL: add64ri8:
167167
; NF: # %bb.0: # %entry
168-
; NF-NEXT: {nf} addq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xc7,0x7b]
168+
; NF-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
169169
; NF-NEXT: retq # encoding: [0xc3]
170170
entry:
171171
%add = add i64 %a, 123
@@ -207,14 +207,12 @@ entry:
207207
define i32 @add32ri(i32 noundef %a) {
208208
; CHECK-LABEL: add32ri:
209209
; CHECK: # %bb.0: # %entry
210-
; CHECK-NEXT: addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
211-
; CHECK-NEXT: # imm = 0x1E240
210+
; CHECK-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
212211
; CHECK-NEXT: retq # encoding: [0xc3]
213212
;
214213
; NF-LABEL: add32ri:
215214
; NF: # %bb.0: # %entry
216-
; NF-NEXT: {nf} addl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
217-
; NF-NEXT: # imm = 0x1E240
215+
; NF-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
218216
; NF-NEXT: retq # encoding: [0xc3]
219217
entry:
220218
%add = add i32 %a, 123456
@@ -224,14 +222,12 @@ entry:
224222
define i64 @add64ri(i64 noundef %a) {
225223
; CHECK-LABEL: add64ri:
226224
; CHECK: # %bb.0: # %entry
227-
; CHECK-NEXT: addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
228-
; CHECK-NEXT: # imm = 0x1E240
225+
; CHECK-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
229226
; CHECK-NEXT: retq # encoding: [0xc3]
230227
;
231228
; NF-LABEL: add64ri:
232229
; NF: # %bb.0: # %entry
233-
; NF-NEXT: {nf} addq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
234-
; NF-NEXT: # imm = 0x1E240
230+
; NF-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
235231
; NF-NEXT: retq # encoding: [0xc3]
236232
entry:
237233
%add = add i64 %a, 123456

llvm/test/CodeGen/X86/apx/mul-i1024.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
16131613
; EGPR-NDD-NEXT: imulq %r23, %rdi
16141614
; EGPR-NDD-NEXT: addq %rdi, %rdx
16151615
; EGPR-NDD-NEXT: imulq 120(%r22), %r24, %rax
1616-
; EGPR-NDD-NEXT: addq %rax, %rdx, %r9
1616+
; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r9
16171617
; EGPR-NDD-NEXT: movq 96(%r22), %r20
16181618
; EGPR-NDD-NEXT: movq 104(%r22), %rdi
16191619
; EGPR-NDD-NEXT: imulq %rdi, %r26, %r10
@@ -1756,7 +1756,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
17561756
; EGPR-NDD-NEXT: addq %rsi, %rdx
17571757
; EGPR-NDD-NEXT: movq 104(%r15), %r8
17581758
; EGPR-NDD-NEXT: imulq %r10, %r8, %rax
1759-
; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi
1759+
; EGPR-NDD-NEXT: leaq (%rdx,%rax), %rsi
17601760
; EGPR-NDD-NEXT: movq 112(%r15), %rax
17611761
; EGPR-NDD-NEXT: imulq %r23, %rax, %r9
17621762
; EGPR-NDD-NEXT: mulq %r16
@@ -1793,7 +1793,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
17931793
; EGPR-NDD-NEXT: movq %rax, %r9
17941794
; EGPR-NDD-NEXT: addq %r8, %rdx
17951795
; EGPR-NDD-NEXT: imulq %r16, %r25, %rax
1796-
; EGPR-NDD-NEXT: addq %rax, %rdx, %r8
1796+
; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r8
17971797
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
17981798
; EGPR-NDD-NEXT: imulq %r23, %r24, %r16
17991799
; EGPR-NDD-NEXT: movq %r24, %rax

llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
define signext i16 @partial_write(ptr %p, i32 %a, i32 %b, i16 signext %x, i16 signext %y) #0 {
1616
; RCDEFAULT-LABEL: partial_write:
1717
; RCDEFAULT: # %bb.0: # %entry
18-
; RCDEFAULT-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
18+
; RCDEFAULT-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
1919
; RCDEFAULT-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
2020
; RCDEFAULT-NEXT: addw %cx, %ax, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0xc8]
2121
; RCDEFAULT-NEXT: retq # encoding: [0xc3]
2222
;
2323
; RC1-LABEL: partial_write:
2424
; RC1: # %bb.0: # %entry
25-
; RC1-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
25+
; RC1-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
2626
; RC1-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
2727
; RC1-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
2828
; RC1-NEXT: retq # encoding: [0xc3]

llvm/test/CodeGen/X86/apx/shl.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,12 +396,12 @@ entry:
396396
define i32 @shl32r1(i32 noundef %a) {
397397
; CHECK-LABEL: shl32r1:
398398
; CHECK: # %bb.0: # %entry
399-
; CHECK-NEXT: addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff]
399+
; CHECK-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
400400
; CHECK-NEXT: retq # encoding: [0xc3]
401401
;
402402
; NF-LABEL: shl32r1:
403403
; NF: # %bb.0: # %entry
404-
; NF-NEXT: {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff]
404+
; NF-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
405405
; NF-NEXT: retq # encoding: [0xc3]
406406
entry:
407407
%shl = shl i32 %a, 1
@@ -411,12 +411,12 @@ entry:
411411
define i64 @shl64r1(i64 noundef %a) {
412412
; CHECK-LABEL: shl64r1:
413413
; CHECK: # %bb.0: # %entry
414-
; CHECK-NEXT: addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff]
414+
; CHECK-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
415415
; CHECK-NEXT: retq # encoding: [0xc3]
416416
;
417417
; NF-LABEL: shl64r1:
418418
; NF: # %bb.0: # %entry
419-
; NF-NEXT: {nf} addq %rdi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xff]
419+
; NF-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
420420
; NF-NEXT: retq # encoding: [0xc3]
421421
entry:
422422
%shl = shl i64 %a, 1

llvm/test/CodeGen/X86/apx/sub.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,14 +207,12 @@ entry:
207207
define i32 @sub32ri(i32 noundef %a) {
208208
; CHECK-LABEL: sub32ri:
209209
; CHECK: # %bb.0: # %entry
210-
; CHECK-NEXT: addl $-123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
211-
; CHECK-NEXT: # imm = 0xFFFE1DC0
210+
; CHECK-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
212211
; CHECK-NEXT: retq # encoding: [0xc3]
213212
;
214213
; NF-LABEL: sub32ri:
215214
; NF: # %bb.0: # %entry
216-
; NF-NEXT: {nf} addl $-123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
217-
; NF-NEXT: # imm = 0xFFFE1DC0
215+
; NF-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
218216
; NF-NEXT: retq # encoding: [0xc3]
219217
entry:
220218
%sub = sub i32 %a, 123456

0 commit comments

Comments
 (0)