Skip to content

Commit 1a09581

Browse files
authored
[AArch64][GlobalISel] Be more precise in RegBankSelect for s/uitofp (#154489)
We can generate fpr->fpr instructions for G_SITOFP and G_UITOFP. It was previously marking the instructions as FPR but then generating GPR instructions and introducing a copy.
1 parent 87a1d42 commit 1a09581

File tree

5 files changed

+64
-224
lines changed

5 files changed

+64
-224
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,67 +1102,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
11021102
return true;
11031103
}
11041104

1105-
static unsigned selectIntToFPConvOpc(unsigned GenericOpc, LLT DstTy,
1106-
LLT SrcTy) {
1107-
if (!DstTy.isScalar() || !SrcTy.isScalar())
1108-
return GenericOpc;
1109-
1110-
const unsigned DstSize = DstTy.getSizeInBits();
1111-
const unsigned SrcSize = SrcTy.getSizeInBits();
1112-
1113-
switch (DstSize) {
1114-
case 32:
1115-
switch (SrcSize) {
1116-
case 32:
1117-
switch (GenericOpc) {
1118-
case TargetOpcode::G_SITOFP:
1119-
return AArch64::SCVTFUWSri;
1120-
case TargetOpcode::G_UITOFP:
1121-
return AArch64::UCVTFUWSri;
1122-
default:
1123-
return GenericOpc;
1124-
}
1125-
case 64:
1126-
switch (GenericOpc) {
1127-
case TargetOpcode::G_SITOFP:
1128-
return AArch64::SCVTFUXSri;
1129-
case TargetOpcode::G_UITOFP:
1130-
return AArch64::UCVTFUXSri;
1131-
default:
1132-
return GenericOpc;
1133-
}
1134-
default:
1135-
return GenericOpc;
1136-
}
1137-
case 64:
1138-
switch (SrcSize) {
1139-
case 32:
1140-
switch (GenericOpc) {
1141-
case TargetOpcode::G_SITOFP:
1142-
return AArch64::SCVTFUWDri;
1143-
case TargetOpcode::G_UITOFP:
1144-
return AArch64::UCVTFUWDri;
1145-
default:
1146-
return GenericOpc;
1147-
}
1148-
case 64:
1149-
switch (GenericOpc) {
1150-
case TargetOpcode::G_SITOFP:
1151-
return AArch64::SCVTFUXDri;
1152-
case TargetOpcode::G_UITOFP:
1153-
return AArch64::UCVTFUXDri;
1154-
default:
1155-
return GenericOpc;
1156-
}
1157-
default:
1158-
return GenericOpc;
1159-
}
1160-
default:
1161-
return GenericOpc;
1162-
};
1163-
return GenericOpc;
1164-
}
1165-
11661105
MachineInstr *
11671106
AArch64InstructionSelector::emitSelect(Register Dst, Register True,
11681107
Register False, AArch64CC::CondCode CC,
@@ -3509,21 +3448,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
35093448
return true;
35103449
}
35113450

3512-
case TargetOpcode::G_SITOFP:
3513-
case TargetOpcode::G_UITOFP: {
3514-
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3515-
SrcTy = MRI.getType(I.getOperand(1).getReg());
3516-
const unsigned NewOpc = selectIntToFPConvOpc(Opcode, DstTy, SrcTy);
3517-
if (NewOpc == Opcode)
3518-
return false;
3519-
3520-
I.setDesc(TII.get(NewOpc));
3521-
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3522-
I.setFlags(MachineInstr::NoFPExcept);
3523-
3524-
return true;
3525-
}
3526-
35273451
case TargetOpcode::G_FREEZE:
35283452
return selectCopy(I, TII, MRI, TRI, RBI);
35293453

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "AArch64RegisterBankInfo.h"
1515
#include "AArch64RegisterInfo.h"
16+
#include "AArch64Subtarget.h"
1617
#include "MCTargetDesc/AArch64MCTargetDesc.h"
1718
#include "llvm/ADT/STLExtras.h"
1819
#include "llvm/ADT/SmallVector.h"
@@ -492,7 +493,7 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
492493

493494
bool AArch64RegisterBankInfo::isPHIWithFPConstraints(
494495
const MachineInstr &MI, const MachineRegisterInfo &MRI,
495-
const TargetRegisterInfo &TRI, const unsigned Depth) const {
496+
const AArch64RegisterInfo &TRI, const unsigned Depth) const {
496497
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
497498
return false;
498499

@@ -506,7 +507,7 @@ bool AArch64RegisterBankInfo::isPHIWithFPConstraints(
506507

507508
bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
508509
const MachineRegisterInfo &MRI,
509-
const TargetRegisterInfo &TRI,
510+
const AArch64RegisterInfo &TRI,
510511
unsigned Depth) const {
511512
unsigned Op = MI.getOpcode();
512513
if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
@@ -544,7 +545,7 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
544545

545546
bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
546547
const MachineRegisterInfo &MRI,
547-
const TargetRegisterInfo &TRI,
548+
const AArch64RegisterInfo &TRI,
548549
unsigned Depth) const {
549550
switch (MI.getOpcode()) {
550551
case TargetOpcode::G_FPTOSI:
@@ -582,7 +583,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
582583

583584
bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
584585
const MachineRegisterInfo &MRI,
585-
const TargetRegisterInfo &TRI,
586+
const AArch64RegisterInfo &TRI,
586587
unsigned Depth) const {
587588
switch (MI.getOpcode()) {
588589
case AArch64::G_DUP:
@@ -618,6 +619,19 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
618619
return hasFPConstraints(MI, MRI, TRI, Depth);
619620
}
620621

622+
bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
623+
const MachineRegisterInfo &MRI,
624+
const AArch64RegisterInfo &TRI,
625+
unsigned Depth) const {
626+
switch (MI.getOpcode()) {
627+
case TargetOpcode::G_SITOFP:
628+
case TargetOpcode::G_UITOFP:
629+
return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() ==
630+
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
631+
}
632+
return onlyDefinesFP(MI, MRI, TRI, Depth);
633+
}
634+
621635
bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
622636
// GMemOperation because we also want to match indexed loads.
623637
auto *MemOp = cast<GMemOperation>(&MI);
@@ -671,8 +685,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
671685

672686
const MachineFunction &MF = *MI.getParent()->getParent();
673687
const MachineRegisterInfo &MRI = MF.getRegInfo();
674-
const TargetSubtargetInfo &STI = MF.getSubtarget();
675-
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
688+
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
689+
const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
676690

677691
switch (Opc) {
678692
// G_{F|S|U}REM are not listed because they are not legal.
@@ -826,7 +840,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
826840
// Integer to FP conversions don't necessarily happen between GPR -> FPR
827841
// regbanks. They can also be done within an FPR register.
828842
Register SrcReg = MI.getOperand(1).getReg();
829-
if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
843+
if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
844+
MRI.getType(SrcReg).getSizeInBits() ==
845+
MRI.getType(MI.getOperand(0).getReg()).getSizeInBits())
830846
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
831847
else
832848
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
@@ -895,13 +911,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
895911
// instruction.
896912
//
897913
// Int->FP conversion operations are also captured in
898-
// onlyDefinesFP().
914+
// prefersFPUse().
899915

900916
if (isPHIWithFPConstraints(UseMI, MRI, TRI))
901917
return true;
902918

903919
return onlyUsesFP(UseMI, MRI, TRI) ||
904-
onlyDefinesFP(UseMI, MRI, TRI);
920+
prefersFPUse(UseMI, MRI, TRI);
905921
}))
906922
OpRegBankIdx[0] = PMI_FirstFPR;
907923
break;

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
namespace llvm {
2323

2424
class TargetRegisterInfo;
25+
class AArch64RegisterInfo;
2526

2627
class AArch64GenRegisterBankInfo : public RegisterBankInfo {
2728
protected:
@@ -123,21 +124,26 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
123124
/// \returns true if \p MI is a PHI that its def is used by
124125
/// any instruction that onlyUsesFP.
125126
bool isPHIWithFPConstraints(const MachineInstr &MI,
126-
const MachineRegisterInfo &MRI,
127-
const TargetRegisterInfo &TRI,
128-
unsigned Depth = 0) const;
127+
const MachineRegisterInfo &MRI,
128+
const AArch64RegisterInfo &TRI,
129+
unsigned Depth = 0) const;
129130

130131
/// \returns true if \p MI only uses and defines FPRs.
131132
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
132-
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
133+
const AArch64RegisterInfo &TRI,
134+
unsigned Depth = 0) const;
133135

134136
/// \returns true if \p MI only uses FPRs.
135137
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
136-
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
138+
const AArch64RegisterInfo &TRI, unsigned Depth = 0) const;
137139

138140
/// \returns true if \p MI only defines FPRs.
139141
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
140-
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
142+
const AArch64RegisterInfo &TRI, unsigned Depth = 0) const;
143+
144+
/// \returns true if \p MI can take both fpr and gpr uses, but prefers fp.
145+
bool prefersFPUse(const MachineInstr &MI, const MachineRegisterInfo &MRI,
146+
const AArch64RegisterInfo &TRI, unsigned Depth = 0) const;
141147

142148
/// \returns true if the load \p MI is likely loading from a floating-point
143149
/// type.

llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -357,31 +357,6 @@ body: |
357357
$d0 = COPY %1(s64)
358358
...
359359

360-
---
361-
name: sitofp_s64_s32_fpr_both
362-
legalized: true
363-
regBankSelected: true
364-
365-
registers:
366-
- { id: 0, class: fpr }
367-
- { id: 1, class: fpr }
368-
369-
body: |
370-
bb.0:
371-
liveins: $s0
372-
373-
; CHECK-LABEL: name: sitofp_s64_s32_fpr_both
374-
; CHECK: liveins: $s0
375-
; CHECK-NEXT: {{ $}}
376-
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
377-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
378-
; CHECK-NEXT: [[SCVTFUWDri:%[0-9]+]]:fpr64 = nofpexcept SCVTFUWDri [[COPY1]]
379-
; CHECK-NEXT: $d0 = COPY [[SCVTFUWDri]]
380-
%0(s32) = COPY $s0
381-
%1(s64) = G_SITOFP %0
382-
$d0 = COPY %1(s64)
383-
...
384-
385360
---
386361
name: sitofp_s64_s64_fpr
387362
legalized: true

0 commit comments

Comments
 (0)