Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 42 additions & 37 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1283,11 +1283,14 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,

MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
#define CASE_NF(OP) \
case X86::OP: \
case X86::OP##_NF:
switch (MIOpc) {
default:
llvm_unreachable("Unreachable!");
case X86::SHL8ri:
case X86::SHL16ri: {
CASE_NF(SHL8ri)
CASE_NF(SHL16ri) {
unsigned ShAmt = MI.getOperand(2).getImm();
MIB.addReg(0)
.addImm(1LL << ShAmt)
Expand All @@ -1296,23 +1299,23 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
.addReg(0);
break;
}
case X86::INC8r:
case X86::INC16r:
CASE_NF(INC8r)
CASE_NF(INC16r)
addRegOffset(MIB, InRegLEA, true, 1);
break;
case X86::DEC8r:
case X86::DEC16r:
CASE_NF(DEC8r)
CASE_NF(DEC16r)
addRegOffset(MIB, InRegLEA, true, -1);
break;
case X86::ADD8ri:
CASE_NF(ADD8ri)
CASE_NF(ADD16ri)
case X86::ADD8ri_DB:
case X86::ADD16ri:
case X86::ADD16ri_DB:
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
case X86::ADD8rr:
CASE_NF(ADD8rr)
CASE_NF(ADD16rr)
case X86::ADD8rr_DB:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
Src2 = MI.getOperand(2).getReg();
Src2SubReg = MI.getOperand(2).getSubReg();
Expand Down Expand Up @@ -1449,7 +1452,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MIOpc) {
default:
llvm_unreachable("Unreachable!");
case X86::SHL64ri: {
CASE_NF(SHL64ri) {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
Expand All @@ -1469,7 +1472,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
.addReg(0);
break;
}
case X86::SHL32ri: {
CASE_NF(SHL32ri) {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
Expand Down Expand Up @@ -1501,20 +1504,20 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
case X86::SHL8ri:
CASE_NF(SHL8ri)
Is8BitOp = true;
[[fallthrough]];
case X86::SHL16ri: {
CASE_NF(SHL16ri) {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
return nullptr;
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
}
case X86::INC64r:
case X86::INC32r: {
CASE_NF(INC64r)
CASE_NF(INC32r) {
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r
unsigned Opc = (MIOpc == X86::INC64r || MIOpc == X86::INC64r_NF)
? X86::LEA64r
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
Expand All @@ -1536,10 +1539,10 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
case X86::DEC64r:
case X86::DEC32r: {
CASE_NF(DEC64r)
CASE_NF(DEC32r) {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r
unsigned Opc = (MIOpc == X86::DEC64r || MIOpc == X86::DEC64r_NF)
? X86::LEA64r
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);

Expand All @@ -1562,20 +1565,21 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
case X86::DEC8r:
case X86::INC8r:
CASE_NF(DEC8r)
CASE_NF(INC8r)
Is8BitOp = true;
[[fallthrough]];
case X86::DEC16r:
case X86::INC16r:
CASE_NF(DEC16r)
CASE_NF(INC16r)
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64rr:
CASE_NF(ADD64rr)
CASE_NF(ADD32rr)
case X86::ADD64rr_DB:
case X86::ADD32rr:
case X86::ADD32rr_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc;
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_NF ||
MIOpc == X86::ADD64rr_DB)
Opc = X86::LEA64r;
else
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
Expand Down Expand Up @@ -1620,21 +1624,21 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
NumRegOperands = 3;
break;
}
case X86::ADD8rr:
CASE_NF(ADD8rr)
case X86::ADD8rr_DB:
Is8BitOp = true;
[[fallthrough]];
case X86::ADD16rr:
CASE_NF(ADD16rr)
case X86::ADD16rr_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64ri32:
CASE_NF(ADD64ri32)
case X86::ADD64ri32_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(
BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
MI.getOperand(2));
break;
case X86::ADD32ri:
CASE_NF(ADD32ri)
case X86::ADD32ri_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
Expand All @@ -1659,18 +1663,18 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
case X86::ADD8ri:
CASE_NF(ADD8ri)
case X86::ADD8ri_DB:
Is8BitOp = true;
[[fallthrough]];
case X86::ADD16ri:
CASE_NF(ADD16ri)
case X86::ADD16ri_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::SUB8ri:
case X86::SUB16ri:
CASE_NF(SUB8ri)
CASE_NF(SUB16ri)
/// FIXME: Support these similar to ADD8ri/ADD16ri*.
return nullptr;
case X86::SUB32ri: {
CASE_NF(SUB32ri) {
if (!MI.getOperand(2).isImm())
return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
Expand Down Expand Up @@ -1701,7 +1705,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
break;
}

case X86::SUB64ri32: {
CASE_NF(SUB64ri32) {
if (!MI.getOperand(2).isImm())
return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
Expand Down Expand Up @@ -2034,6 +2038,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
break;
}
}
#undef CASE_NF

if (!NewMI)
return nullptr;
Expand Down
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/X86/apx/nf-regressions.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64 -mattr=+nf -verify-machineinstrs | FileCheck %s

define void @convertToThreeAddress(ptr %arg, ptr %arg1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this test be reduced further? Which instruction is converted from NF? LEA at line 10?

Copy link
Contributor Author

@phoebewang phoebewang Mar 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's LEA at line 10.

Reduced branches a lot, manually 😂 Still reproducible: https://godbolt.org/z/6Kx5rM9E9

; CHECK-LABEL: convertToThreeAddress:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movslq (%rdi), %rax
; CHECK-NEXT: movslq (%rsi), %rcx
; CHECK-NEXT: subq %rax, %rcx
; CHECK-NEXT: leaq 1(%rcx), %rax
; CHECK-NEXT: js .LBB0_1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_6: # %bb
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpq $1, %rax
; CHECK-NEXT: jg .LBB0_6
; CHECK-NEXT: .LBB0_5: # %bb16
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: # %bb10
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.7: # %bb11
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: jns .LBB0_2
; CHECK-NEXT: jmp .LBB0_5
; CHECK-NEXT: .LBB0_3: # %bb10
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne .LBB0_5
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_4: # %bb12
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpq $1, %rax
; CHECK-NEXT: jg .LBB0_4
; CHECK-NEXT: jmp .LBB0_5
entry:
%i = load i32, ptr %arg, align 4
%i2 = sext i32 %i to i64
%i3 = load i32, ptr %arg1, align 4
%i4 = sext i32 %i3 to i64
%i5 = sub nsw i64 %i4, %i2
%i6 = add nsw i64 %i5, 1
%i7 = icmp sgt i64 %i5, -1
br i1 %i7, label %bb, label %bb10

bb: ; preds = %bb, %entry
%i8 = phi i64 [ %i6, %entry ], [ poison, %bb ]
%i9 = icmp sgt i64 %i8, 1
br i1 %i9, label %bb, label %bb16

bb10: ; preds = %bb11, %entry
switch i32 poison, label %bb16 [
i32 1, label %bb11
i32 2, label %bb12
]

bb11: ; preds = %bb10
br i1 %i7, label %bb10, label %bb16

bb12: ; preds = %bb14, %bb10
%i13 = phi i64 [ poison, %bb14 ], [ %i6, %bb10 ]
br label %bb14

bb14: ; preds = %bb12
%i15 = icmp sgt i64 %i13, 1
br i1 %i15, label %bb12, label %bb16

bb16: ; preds = %bb14, %bb11, %bb10, %bb
ret void
}
Loading